1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
46 #include "convert.h" /* for html_quote_string prototype */
48 extern FILE *output_stream;
50 /* Converts symbolic permissions to number-style ones, e.g. string
51 rwxr-xr-x to 755. For now, it knows nothing of
52 setuid/setgid/sticky. ACLs are ignored. */
54 symperms (const char *s)
60 for (i = 0; i < 3; i++, s += 3)
63 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
64 (s[2] == 'x' || s[2] == 's'));
70 /* Cleans a line of text so that it can be consistently parsed. Destroys
71 <CR> and <LF> in case that thay occur at the end of the line and
72 replaces all <TAB> character with <SPACE>. Returns the length of the
75 clean_line(char *line)
77 int len = strlen (line);
79 if (line[len - 1] == '\n')
81 if (line[len - 1] == '\r')
83 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
87 /* Convert the Un*x-ish style directory listing stored in FILE to a
88 linked list of fileinfo (system-independent) entries. The contents
89 of FILE are considered to be produced by the standard Unix `ls -la'
90 output (whatever that might be). BSD (no group) and SYSV (with
91 group) listings are handled.
93 The time stamps are stored in a separate variable, time_t
94 compatible (I hope). The timezones are ignored. */
95 static struct fileinfo *
96 ftp_parse_unix_ls (const char *file, int ignore_perms)
99 static const char *months[] = {
100 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
101 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
103 int next, len, i, error, ignore;
104 int year, month, day; /* for time analysis */
106 struct tm timestruct, *tnow;
109 char *line, *tok; /* tokenizer */
110 struct fileinfo *dir, *l, cur; /* list creation */
112 fp = fopen (file, "rb");
115 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
120 /* Line loop to end of file: */
121 while ((line = read_whole_line (fp)) != NULL)
123 len = clean_line (line);
124 /* Skip if total... */
125 if (!strncasecmp (line, "total", 5))
130 /* Get the first token (permissions). */
131 tok = strtok (line, " ");
141 /* Decide whether we deal with a file or a directory. */
145 cur.type = FT_PLAINFILE;
146 DEBUGP (("PLAINFILE; "));
149 cur.type = FT_DIRECTORY;
150 DEBUGP (("DIRECTORY; "));
153 cur.type = FT_SYMLINK;
154 DEBUGP (("SYMLINK; "));
157 cur.type = FT_UNKNOWN;
158 DEBUGP (("UNKNOWN; "));
173 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
176 DEBUGP (("implicit perms %0o; ", cur.perms));
180 cur.perms = symperms (tok + 1);
181 DEBUGP (("perms %0o; ", cur.perms));
184 error = ignore = 0; /* Erroneous and ignoring entries are
185 treated equally for now. */
186 year = hour = min = sec = 0; /* Silence the compiler. */
189 /* While there are tokens on the line, parse them. Next is the
190 number of tokens left until the filename.
192 Use the month-name token as the "anchor" (the place where the
193 position wrt the file name is "known"). When a month name is
194 encountered, `next' is set to 5. Also, the preceding
195 characters are parsed to get the file size.
197 This tactic is quite dubious when it comes to
198 internationalization issues (non-English month names), but it
200 while ((tok = strtok (NULL, " ")) != NULL)
203 if (next < 0) /* a month name was not encountered */
205 for (i = 0; i < 12; i++)
206 if (!strcmp (tok, months[i]))
208 /* If we got a month, it means the token before it is the
209 size, and the filename is three tokens away. */
214 /* Back up to the beginning of the previous token
215 and parse it with str_to_wgint. */
217 while (t > line && ISDIGIT (*t))
221 /* Something has gone wrong during parsing. */
226 size = str_to_wgint (t, NULL, 10);
227 if (size == WGINT_MAX && errno == ERANGE)
228 /* Out of range -- ignore the size. #### Should
229 we refuse to start the download. */
236 DEBUGP (("month: %s; ", months[month]));
239 else if (next == 4) /* days */
241 if (tok[1]) /* two-digit... */
242 day = 10 * (*tok - '0') + tok[1] - '0';
243 else /* ...or one-digit */
245 DEBUGP (("day: %d; ", day));
249 /* This ought to be either the time, or the year. Let's
252 If we have a number x, it's a year. If we have x:y,
253 it's hours and minutes. If we have x:y:z, z are
256 min = hour = sec = 0;
257 /* We must deal with digits. */
260 /* Suppose it's year. */
261 for (; ISDIGIT (*tok); tok++)
262 year = (*tok - '0') + 10 * year;
265 /* This means these were hours! */
269 /* Get the minutes... */
270 for (; ISDIGIT (*tok); tok++)
271 min = (*tok - '0') + 10 * min;
274 /* ...and the seconds. */
276 for (; ISDIGIT (*tok); tok++)
277 sec = (*tok - '0') + 10 * sec;
282 DEBUGP (("year: %d (no tm); ", year));
284 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
286 else if (next == 2) /* The file name */
291 /* Since the file name may contain a SPC, it is possible
292 for strtok to handle it wrong. */
293 fnlen = strlen (tok);
294 if (fnlen < len - (tok - line))
296 /* So we have a SPC in the file name. Restore the
299 /* If the file is a symbolic link, it should have a
301 if (cur.type == FT_SYMLINK)
303 p = strstr (tok, " -> ");
309 cur.linkto = xstrdup (p + 4);
310 DEBUGP (("link to: %s\n", cur.linkto));
311 /* And separate it from the file name. */
315 /* If we have the filename, add it to the list of files or
317 /* "." and ".." are an exception! */
318 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
320 DEBUGP (("\nIgnoring `.' and `..'; "));
324 /* Some FTP sites choose to have ls -F as their default
325 LIST output, which marks the symlinks with a trailing
326 `@', directory names with a trailing `/' and
327 executables with a trailing `*'. This is no problem
328 unless encountering a symbolic link ending with `@',
329 or an executable ending with `*' on a server without
330 default -F output. I believe these cases are very
332 fnlen = strlen (tok); /* re-calculate `fnlen' */
333 cur.name = xmalloc (fnlen + 1);
334 memcpy (cur.name, tok, fnlen + 1);
337 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
339 cur.name[fnlen - 1] = '\0';
340 DEBUGP (("trailing `/' on dir.\n"));
342 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
344 cur.name[fnlen - 1] = '\0';
345 DEBUGP (("trailing `@' on link.\n"));
347 else if (cur.type == FT_PLAINFILE
348 && (cur.perms & 0111)
349 && cur.name[fnlen - 1] == '*')
351 cur.name[fnlen - 1] = '\0';
352 DEBUGP (("trailing `*' on exec.\n"));
363 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
370 DEBUGP (("Skipping.\n"));
371 xfree_null (cur.name);
372 xfree_null (cur.linkto);
379 l = dir = xnew (struct fileinfo);
380 memcpy (l, &cur, sizeof (cur));
381 l->prev = l->next = NULL;
386 l->next = xnew (struct fileinfo);
388 memcpy (l, &cur, sizeof (cur));
391 /* Get the current time. */
392 timenow = time (NULL);
393 tnow = localtime (&timenow);
394 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
395 timestruct.tm_sec = sec;
396 timestruct.tm_min = min;
397 timestruct.tm_hour = hour;
398 timestruct.tm_mday = day;
399 timestruct.tm_mon = month;
402 /* Some listings will not specify the year if it is "obvious"
403 that the file was from the previous year. E.g. if today
404 is 97-01-12, and you see a file of Dec 15th, its year is
405 1996, not 1997. Thanks to Vladimir Volovich for
407 if (month > tnow->tm_mon)
408 timestruct.tm_year = tnow->tm_year - 1;
410 timestruct.tm_year = tnow->tm_year;
413 timestruct.tm_year = year;
414 if (timestruct.tm_year >= 1900)
415 timestruct.tm_year -= 1900;
416 timestruct.tm_wday = 0;
417 timestruct.tm_yday = 0;
418 timestruct.tm_isdst = -1;
419 l->tstamp = mktime (×truct); /* store the time-stamp */
428 static struct fileinfo *
429 ftp_parse_winnt_ls (const char *file)
433 int year, month, day; /* for time analysis */
435 struct tm timestruct;
437 char *line, *tok; /* tokenizer */
438 struct fileinfo *dir, *l, cur; /* list creation */
440 fp = fopen (file, "rb");
443 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
448 /* Line loop to end of file: */
449 while ((line = read_whole_line (fp)) != NULL)
451 len = clean_line (line);
453 /* Extracting name is a bit of black magic and we have to do it
454 before `strtok' inserted extra \0 characters in the line
455 string. For the moment let us just suppose that the name starts at
456 column 39 of the listing. This way we could also recognize
457 filenames that begin with a series of space characters (but who
458 really wants to use such filenames anyway?). */
459 if (len < 40) continue;
461 cur.name = xstrdup(tok);
462 DEBUGP(("Name: '%s'\n", cur.name));
464 /* First column: mm-dd-yy. Should atoi() on the month fail, january
466 tok = strtok(line, "-");
467 if (tok == NULL) continue;
468 month = atoi(tok) - 1;
469 if (month < 0) month = 0;
470 tok = strtok(NULL, "-");
471 if (tok == NULL) continue;
473 tok = strtok(NULL, " ");
474 if (tok == NULL) continue;
476 /* Assuming the epoch starting at 1.1.1970 */
477 if (year <= 70) year += 100;
479 /* Second column: hh:mm[AP]M, listing does not contain value for
481 tok = strtok(NULL, ":");
482 if (tok == NULL) continue;
484 tok = strtok(NULL, "M");
485 if (tok == NULL) continue;
487 /* Adjust hour from AM/PM. Just for the record, the sequence goes
488 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
490 if (hour == 12) hour = 0;
491 if (*tok == 'P') hour += 12;
493 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
494 year+1900, month, day, hour, min));
496 /* Build the time-stamp (copy & paste from above) */
497 timestruct.tm_sec = 0;
498 timestruct.tm_min = min;
499 timestruct.tm_hour = hour;
500 timestruct.tm_mday = day;
501 timestruct.tm_mon = month;
502 timestruct.tm_year = year;
503 timestruct.tm_wday = 0;
504 timestruct.tm_yday = 0;
505 timestruct.tm_isdst = -1;
506 cur.tstamp = mktime (×truct); /* store the time-stamp */
508 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
510 /* Third column: Either file length, or <DIR>. We also set the
511 permissions (guessed as 0644 for plain files and 0755 for
512 directories as the listing does not give us a clue) and filetype
514 tok = strtok(NULL, " ");
515 if (tok == NULL) continue;
516 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
517 if (tok == NULL) continue;
520 cur.type = FT_DIRECTORY;
523 DEBUGP(("Directory\n"));
528 cur.type = FT_PLAINFILE;
530 size = str_to_wgint (tok, NULL, 10);
531 if (size == WGINT_MAX && errno == ERANGE)
532 cur.size = 0; /* overflow */
536 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
541 /* And put everything into the linked list */
544 l = dir = xnew (struct fileinfo);
545 memcpy (l, &cur, sizeof (cur));
546 l->prev = l->next = NULL;
551 l->next = xnew (struct fileinfo);
553 memcpy (l, &cur, sizeof (cur));
564 /* Converts VMS symbolic permissions to number-style ones, e.g. string
565 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
566 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
568 vmsperms (const char *s)
575 case ',': perms <<= 3; break;
576 case 'R': perms |= 4; break;
577 case 'W': perms |= 2; break;
578 case 'D': perms |= 2; break;
579 case 'E': perms |= 1; break;
580 default: DEBUGP(("wrong VMS permissons!\n"));
588 static struct fileinfo *
589 ftp_parse_vms_ls (const char *file)
592 /* #### A third copy of more-or-less the same array ? */
593 static const char *months[] = {
594 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
595 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
598 int year, month, day; /* for time analysis */
600 struct tm timestruct;
602 char *line, *tok; /* tokenizer */
603 struct fileinfo *dir, *l, cur; /* list creation */
605 fp = fopen (file, "rb");
608 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
613 /* Skip empty line. */
614 line = read_whole_line (fp);
617 /* Skip "Directory PUB$DEVICE[PUB]" */
618 line = read_whole_line (fp);
621 /* Skip empty line. */
622 line = read_whole_line (fp);
625 /* Line loop to end of file: */
626 while ((line = read_whole_line (fp)) != NULL)
629 i = clean_line (line);
636 /* First column: Name. A bit of black magic again. The name my be
637 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
638 line. Therefore we will first try to get the complete name
639 until the first space character; if it fails, we assume that the name
640 occupies the whole line. After that we search for the version
641 separator ";", we remove it and check the extension of the file;
642 extension .DIR denotes directory. */
644 tok = strtok(line, " ");
645 if (tok == NULL) tok = line;
646 DEBUGP(("file name: '%s'\n", tok));
647 for (p = tok ; *p && *p != ';' ; p++);
648 if (*p == ';') *p = '\0';
649 p = tok + strlen(tok) - 4;
650 if (!strcmp(p, ".DIR")) *p = '\0';
651 cur.name = xstrdup(tok);
652 DEBUGP(("Name: '%s'\n", cur.name));
654 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
655 the file size to zero as the listing does tell us only the size in
656 filesystem blocks - for an integrity check (when mirroring, for
657 example) we would need the size in bytes. */
661 cur.type = FT_DIRECTORY;
663 DEBUGP(("Directory\n"));
667 cur.type = FT_PLAINFILE;
673 /* Second column, if exists, or the first column of the next line
674 contain file size in blocks. We will skip it. */
676 tok = strtok(NULL, " ");
679 DEBUGP(("Getting additional line\n"));
681 line = read_whole_line (fp);
684 DEBUGP(("empty line read, leaving listing parser\n"));
687 i = clean_line (line);
690 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
694 tok = strtok(line, " ");
696 DEBUGP(("second token: '%s'\n", tok));
698 /* Third/Second column: Date DD-MMM-YYYY. */
700 tok = strtok(NULL, "-");
701 if (tok == NULL) continue;
702 DEBUGP(("day: '%s'\n",tok));
704 tok = strtok(NULL, "-");
707 /* If the server produces garbage like
708 'EA95_0PS.GZ;1 No privilege for attempted operation'
709 the first strtok(NULL, "-") will return everything until the end
710 of the line and only the next strtok() call will return NULL. */
711 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
715 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
716 /* Uknown months are mapped to January */
718 tok = strtok (NULL, " ");
719 if (tok == NULL) continue;
720 year = atoi (tok) - 1900;
721 DEBUGP(("date parsed\n"));
723 /* Fourth/Third column: Time hh:mm[:ss] */
724 tok = strtok (NULL, " ");
725 if (tok == NULL) continue;
729 for (; *p && *p != ':'; ++p);
732 for (; *p && *p != ':'; ++p);
736 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
737 year+1900, month, day, hour, min, sec));
739 /* Build the time-stamp (copy & paste from above) */
740 timestruct.tm_sec = sec;
741 timestruct.tm_min = min;
742 timestruct.tm_hour = hour;
743 timestruct.tm_mday = day;
744 timestruct.tm_mon = month;
745 timestruct.tm_year = year;
746 timestruct.tm_wday = 0;
747 timestruct.tm_yday = 0;
748 timestruct.tm_isdst = -1;
749 cur.tstamp = mktime (×truct); /* store the time-stamp */
751 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
753 /* Skip the fifth column */
755 tok = strtok(NULL, " ");
756 if (tok == NULL) continue;
758 /* Sixth column: Permissions */
760 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
761 if (tok == NULL) continue;
762 tok = strtok(NULL, ")");
765 DEBUGP(("confusing VMS permissions, skipping line\n"));
769 /* Permissons have the format "RWED,RWED,RE" */
770 cur.perms = vmsperms(tok);
771 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
775 /* And put everything into the linked list */
778 l = dir = xnew (struct fileinfo);
779 memcpy (l, &cur, sizeof (cur));
780 l->prev = l->next = NULL;
785 l->next = xnew (struct fileinfo);
787 memcpy (l, &cur, sizeof (cur));
799 /* This function switches between the correct parsing routine depending on
800 the SYSTEM_TYPE. The system type should be based on the result of the
801 "SYST" response of the FTP server. According to this repsonse we will
802 use on of the three different listing parsers that cover the most of FTP
803 servers used nowadays. */
806 ftp_parse_ls (const char *file, const enum stype system_type)
811 return ftp_parse_unix_ls (file, 0);
814 /* Detect whether the listing is simulating the UNIX format */
817 fp = fopen (file, "rb");
820 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
825 /* If the first character of the file is '0'-'9', it's WINNT
827 if (c >= '0' && c <='9')
828 return ftp_parse_winnt_ls (file);
830 return ftp_parse_unix_ls (file, 1);
833 return ftp_parse_vms_ls (file);
835 return ftp_parse_unix_ls (file, 1);
837 logprintf (LOG_NOTQUIET, _("\
838 Unsupported listing type, trying Unix listing parser.\n"));
839 return ftp_parse_unix_ls (file, 0);
843 /* Stuff for creating FTP index. */
845 /* The function creates an HTML index containing references to given
846 directories and files on the appropriate host. The references are
849 ftp_index (const char *file, struct url *u, struct fileinfo *f)
853 char *htclfile; /* HTML-clean file name */
857 fp = fopen (file, "wb");
860 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
868 char *tmpu, *tmpp; /* temporary, clean user and passwd */
870 tmpu = url_escape (u->user);
871 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
873 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
875 upwd = concat_strings (tmpu, "@", (char *) 0);
881 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
882 fprintf (fp, "<html>\n<head>\n<title>");
883 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
884 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
885 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
886 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
892 /* #### Should we translate the months? Or, even better, use
894 static const char *months[] = {
895 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
896 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
898 struct tm *ptm = localtime ((time_t *)&f->tstamp);
900 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
903 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
908 fprintf (fp, _("time unknown "));
912 fprintf (fp, _("File "));
915 fprintf (fp, _("Directory "));
918 fprintf (fp, _("Link "));
921 fprintf (fp, _("Not sure "));
924 htclfile = html_quote_string (f->name);
925 fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
928 fprintf (fp, "%s", u->dir);
931 fprintf (fp, "%s", htclfile);
932 if (f->type == FT_DIRECTORY)
934 fprintf (fp, "\">%s", htclfile);
935 if (f->type == FT_DIRECTORY)
937 fprintf (fp, "</a> ");
938 if (f->type == FT_PLAINFILE)
939 fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
940 else if (f->type == FT_SYMLINK)
941 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
946 fprintf (fp, "</pre>\n</body>\n</html>\n");