1 /* Parsing FTP `ls' output.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
44 #include "convert.h" /* for html_quote_string prototype */
45 #include "retr.h" /* for output_stream */
47 /* Converts symbolic permissions to number-style ones, e.g. string
48 rwxr-xr-x to 755. For now, it knows nothing of
49 setuid/setgid/sticky. ACLs are ignored. */
51 symperms (const char *s)
57 for (i = 0; i < 3; i++, s += 3)
60 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
61 (s[2] == 'x' || s[2] == 's'));
67 /* Cleans a line of text so that it can be consistently parsed. Destroys
68 <CR> and <LF> in case that thay occur at the end of the line and
69 replaces all <TAB> character with <SPACE>. Returns the length of the
72 clean_line(char *line)
74 int len = strlen (line);
76 if (line[len - 1] == '\n')
79 if (line[len - 1] == '\r')
81 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
85 /* Convert the Un*x-ish style directory listing stored in FILE to a
86 linked list of fileinfo (system-independent) entries. The contents
87 of FILE are considered to be produced by the standard Unix `ls -la'
88 output (whatever that might be). BSD (no group) and SYSV (with
89 group) listings are handled.
91 The time stamps are stored in a separate variable, time_t
92 compatible (I hope). The timezones are ignored. */
93 static struct fileinfo *
94 ftp_parse_unix_ls (const char *file, int ignore_perms)
97 static const char *months[] = {
98 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
99 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
101 int next, len, i, error, ignore;
102 int year, month, day; /* for time analysis */
104 struct tm timestruct, *tnow;
107 char *line, *tok, *ptok; /* tokenizer */
108 struct fileinfo *dir, *l, cur; /* list creation */
110 fp = fopen (file, "rb");
113 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
118 /* Line loop to end of file: */
119 while ((line = read_whole_line (fp)) != NULL)
121 len = clean_line (line);
122 /* Skip if total... */
123 if (!strncasecmp (line, "total", 5))
128 /* Get the first token (permissions). */
129 tok = strtok (line, " ");
139 /* Decide whether we deal with a file or a directory. */
143 cur.type = FT_PLAINFILE;
144 DEBUGP (("PLAINFILE; "));
147 cur.type = FT_DIRECTORY;
148 DEBUGP (("DIRECTORY; "));
151 cur.type = FT_SYMLINK;
152 DEBUGP (("SYMLINK; "));
155 cur.type = FT_UNKNOWN;
156 DEBUGP (("UNKNOWN; "));
171 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
174 DEBUGP (("implicit perms %0o; ", cur.perms));
178 cur.perms = symperms (tok + 1);
179 DEBUGP (("perms %0o; ", cur.perms));
182 error = ignore = 0; /* Erroneous and ignoring entries are
183 treated equally for now. */
184 year = hour = min = sec = 0; /* Silence the compiler. */
187 /* While there are tokens on the line, parse them. Next is the
188 number of tokens left until the filename.
190 Use the month-name token as the "anchor" (the place where the
191 position wrt the file name is "known"). When a month name is
192 encountered, `next' is set to 5. Also, the preceding
193 characters are parsed to get the file size.
195 This tactic is quite dubious when it comes to
196 internationalization issues (non-English month names), but it
200 (tok = strtok (NULL, " ")) != NULL)
203 if (next < 0) /* a month name was not encountered */
205 for (i = 0; i < 12; i++)
206 if (!strcmp (tok, months[i]))
208 /* If we got a month, it means the token before it is the
209 size, and the filename is three tokens away. */
214 /* Parse the previous token with str_to_wgint. */
217 /* Something has gone wrong during parsing. */
222 size = str_to_wgint (ptok, NULL, 10);
223 if (size == WGINT_MAX && errno == ERANGE)
224 /* Out of range -- ignore the size. #### Should
225 we refuse to start the download. */
229 DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
233 DEBUGP (("month: %s; ", months[month]));
236 else if (next == 4) /* days */
238 if (tok[1]) /* two-digit... */
239 day = 10 * (*tok - '0') + tok[1] - '0';
240 else /* ...or one-digit */
242 DEBUGP (("day: %d; ", day));
246 /* This ought to be either the time, or the year. Let's
249 If we have a number x, it's a year. If we have x:y,
250 it's hours and minutes. If we have x:y:z, z are
253 min = hour = sec = 0;
254 /* We must deal with digits. */
255 if (c_isdigit (*tok))
257 /* Suppose it's year. */
258 for (; c_isdigit (*tok); tok++)
259 year = (*tok - '0') + 10 * year;
262 /* This means these were hours! */
266 /* Get the minutes... */
267 for (; c_isdigit (*tok); tok++)
268 min = (*tok - '0') + 10 * min;
271 /* ...and the seconds. */
273 for (; c_isdigit (*tok); tok++)
274 sec = (*tok - '0') + 10 * sec;
279 DEBUGP (("year: %d (no tm); ", year));
281 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
283 else if (next == 2) /* The file name */
288 /* Since the file name may contain a SPC, it is possible
289 for strtok to handle it wrong. */
290 fnlen = strlen (tok);
291 if (fnlen < len - (tok - line))
293 /* So we have a SPC in the file name. Restore the
296 /* If the file is a symbolic link, it should have a
298 if (cur.type == FT_SYMLINK)
300 p = strstr (tok, " -> ");
306 cur.linkto = xstrdup (p + 4);
307 DEBUGP (("link to: %s\n", cur.linkto));
308 /* And separate it from the file name. */
312 /* If we have the filename, add it to the list of files or
314 /* "." and ".." are an exception! */
315 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
317 DEBUGP (("\nIgnoring `.' and `..'; "));
321 /* Some FTP sites choose to have ls -F as their default
322 LIST output, which marks the symlinks with a trailing
323 `@', directory names with a trailing `/' and
324 executables with a trailing `*'. This is no problem
325 unless encountering a symbolic link ending with `@',
326 or an executable ending with `*' on a server without
327 default -F output. I believe these cases are very
329 fnlen = strlen (tok); /* re-calculate `fnlen' */
330 cur.name = xmalloc (fnlen + 1);
331 memcpy (cur.name, tok, fnlen + 1);
334 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
336 cur.name[fnlen - 1] = '\0';
337 DEBUGP (("trailing `/' on dir.\n"));
339 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
341 cur.name[fnlen - 1] = '\0';
342 DEBUGP (("trailing `@' on link.\n"));
344 else if (cur.type == FT_PLAINFILE
345 && (cur.perms & 0111)
346 && cur.name[fnlen - 1] == '*')
348 cur.name[fnlen - 1] = '\0';
349 DEBUGP (("trailing `*' on exec.\n"));
360 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
363 DEBUGP (("%s\n", cur.name ? cur.name : ""));
367 DEBUGP (("Skipping.\n"));
368 xfree_null (cur.name);
369 xfree_null (cur.linkto);
376 l = dir = xnew (struct fileinfo);
377 memcpy (l, &cur, sizeof (cur));
378 l->prev = l->next = NULL;
383 l->next = xnew (struct fileinfo);
385 memcpy (l, &cur, sizeof (cur));
388 /* Get the current time. */
389 timenow = time (NULL);
390 tnow = localtime (&timenow);
391 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
392 timestruct.tm_sec = sec;
393 timestruct.tm_min = min;
394 timestruct.tm_hour = hour;
395 timestruct.tm_mday = day;
396 timestruct.tm_mon = month;
399 /* Some listings will not specify the year if it is "obvious"
400 that the file was from the previous year. E.g. if today
401 is 97-01-12, and you see a file of Dec 15th, its year is
402 1996, not 1997. Thanks to Vladimir Volovich for
404 if (month > tnow->tm_mon)
405 timestruct.tm_year = tnow->tm_year - 1;
407 timestruct.tm_year = tnow->tm_year;
410 timestruct.tm_year = year;
411 if (timestruct.tm_year >= 1900)
412 timestruct.tm_year -= 1900;
413 timestruct.tm_wday = 0;
414 timestruct.tm_yday = 0;
415 timestruct.tm_isdst = -1;
416 l->tstamp = mktime (×truct); /* store the time-stamp */
425 static struct fileinfo *
426 ftp_parse_winnt_ls (const char *file)
430 int year, month, day; /* for time analysis */
432 struct tm timestruct;
434 char *line, *tok; /* tokenizer */
435 struct fileinfo *dir, *l, cur; /* list creation */
437 fp = fopen (file, "rb");
440 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
445 /* Line loop to end of file: */
446 while ((line = read_whole_line (fp)) != NULL)
448 len = clean_line (line);
450 /* Extracting name is a bit of black magic and we have to do it
451 before `strtok' inserted extra \0 characters in the line
452 string. For the moment let us just suppose that the name starts at
453 column 39 of the listing. This way we could also recognize
454 filenames that begin with a series of space characters (but who
455 really wants to use such filenames anyway?). */
456 if (len < 40) continue;
458 cur.name = xstrdup(tok);
459 DEBUGP(("Name: '%s'\n", cur.name));
461 /* First column: mm-dd-yy. Should atoi() on the month fail, january
463 tok = strtok(line, "-");
464 if (tok == NULL) continue;
465 month = atoi(tok) - 1;
466 if (month < 0) month = 0;
467 tok = strtok(NULL, "-");
468 if (tok == NULL) continue;
470 tok = strtok(NULL, " ");
471 if (tok == NULL) continue;
473 /* Assuming the epoch starting at 1.1.1970 */
474 if (year <= 70) year += 100;
476 /* Second column: hh:mm[AP]M, listing does not contain value for
478 tok = strtok(NULL, ":");
479 if (tok == NULL) continue;
481 tok = strtok(NULL, "M");
482 if (tok == NULL) continue;
484 /* Adjust hour from AM/PM. Just for the record, the sequence goes
485 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
487 if (hour == 12) hour = 0;
488 if (*tok == 'P') hour += 12;
490 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
491 year+1900, month, day, hour, min));
493 /* Build the time-stamp (copy & paste from above) */
494 timestruct.tm_sec = 0;
495 timestruct.tm_min = min;
496 timestruct.tm_hour = hour;
497 timestruct.tm_mday = day;
498 timestruct.tm_mon = month;
499 timestruct.tm_year = year;
500 timestruct.tm_wday = 0;
501 timestruct.tm_yday = 0;
502 timestruct.tm_isdst = -1;
503 cur.tstamp = mktime (×truct); /* store the time-stamp */
505 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
507 /* Third column: Either file length, or <DIR>. We also set the
508 permissions (guessed as 0644 for plain files and 0755 for
509 directories as the listing does not give us a clue) and filetype
511 tok = strtok(NULL, " ");
512 if (tok == NULL) continue;
513 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
514 if (tok == NULL) continue;
517 cur.type = FT_DIRECTORY;
520 DEBUGP(("Directory\n"));
525 cur.type = FT_PLAINFILE;
527 size = str_to_wgint (tok, NULL, 10);
528 if (size == WGINT_MAX && errno == ERANGE)
529 cur.size = 0; /* overflow */
533 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
538 /* And put everything into the linked list */
541 l = dir = xnew (struct fileinfo);
542 memcpy (l, &cur, sizeof (cur));
543 l->prev = l->next = NULL;
548 l->next = xnew (struct fileinfo);
550 memcpy (l, &cur, sizeof (cur));
561 /* Converts VMS symbolic permissions to number-style ones, e.g. string
562 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
563 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
565 vmsperms (const char *s)
572 case ',': perms <<= 3; break;
573 case 'R': perms |= 4; break;
574 case 'W': perms |= 2; break;
575 case 'D': perms |= 2; break;
576 case 'E': perms |= 1; break;
577 default: DEBUGP(("wrong VMS permissons!\n"));
585 static struct fileinfo *
586 ftp_parse_vms_ls (const char *file)
589 /* #### A third copy of more-or-less the same array ? */
590 static const char *months[] = {
591 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
592 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
595 int year, month, day; /* for time analysis */
597 struct tm timestruct;
599 char *line, *tok; /* tokenizer */
600 struct fileinfo *dir, *l, cur; /* list creation */
602 fp = fopen (file, "rb");
605 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
610 /* Skip empty line. */
611 line = read_whole_line (fp);
614 /* Skip "Directory PUB$DEVICE[PUB]" */
615 line = read_whole_line (fp);
618 /* Skip empty line. */
619 line = read_whole_line (fp);
622 /* Line loop to end of file: */
623 while ((line = read_whole_line (fp)) != NULL)
626 i = clean_line (line);
633 /* First column: Name. A bit of black magic again. The name my be
634 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
635 line. Therefore we will first try to get the complete name
636 until the first space character; if it fails, we assume that the name
637 occupies the whole line. After that we search for the version
638 separator ";", we remove it and check the extension of the file;
639 extension .DIR denotes directory. */
641 tok = strtok(line, " ");
642 if (tok == NULL) tok = line;
643 DEBUGP(("file name: '%s'\n", tok));
644 for (p = tok ; *p && *p != ';' ; p++)
646 if (*p == ';') *p = '\0';
647 p = tok + strlen(tok) - 4;
648 if (!strcmp(p, ".DIR")) *p = '\0';
649 cur.name = xstrdup(tok);
650 DEBUGP(("Name: '%s'\n", cur.name));
652 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
653 the file size to zero as the listing does tell us only the size in
654 filesystem blocks - for an integrity check (when mirroring, for
655 example) we would need the size in bytes. */
659 cur.type = FT_DIRECTORY;
661 DEBUGP(("Directory\n"));
665 cur.type = FT_PLAINFILE;
671 /* Second column, if exists, or the first column of the next line
672 contain file size in blocks. We will skip it. */
674 tok = strtok(NULL, " ");
677 DEBUGP(("Getting additional line\n"));
679 line = read_whole_line (fp);
682 DEBUGP(("empty line read, leaving listing parser\n"));
685 i = clean_line (line);
688 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
692 tok = strtok(line, " ");
694 DEBUGP(("second token: '%s'\n", tok));
696 /* Third/Second column: Date DD-MMM-YYYY. */
698 tok = strtok(NULL, "-");
699 if (tok == NULL) continue;
700 DEBUGP(("day: '%s'\n",tok));
702 tok = strtok(NULL, "-");
705 /* If the server produces garbage like
706 'EA95_0PS.GZ;1 No privilege for attempted operation'
707 the first strtok(NULL, "-") will return everything until the end
708 of the line and only the next strtok() call will return NULL. */
709 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
713 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
714 /* Uknown months are mapped to January */
716 tok = strtok (NULL, " ");
717 if (tok == NULL) continue;
718 year = atoi (tok) - 1900;
719 DEBUGP(("date parsed\n"));
721 /* Fourth/Third column: Time hh:mm[:ss] */
722 tok = strtok (NULL, " ");
723 if (tok == NULL) continue;
727 for (; *p && *p != ':'; ++p)
731 for (; *p && *p != ':'; ++p)
736 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
737 year+1900, month, day, hour, min, sec));
739 /* Build the time-stamp (copy & paste from above) */
740 timestruct.tm_sec = sec;
741 timestruct.tm_min = min;
742 timestruct.tm_hour = hour;
743 timestruct.tm_mday = day;
744 timestruct.tm_mon = month;
745 timestruct.tm_year = year;
746 timestruct.tm_wday = 0;
747 timestruct.tm_yday = 0;
748 timestruct.tm_isdst = -1;
749 cur.tstamp = mktime (×truct); /* store the time-stamp */
751 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
753 /* Skip the fifth column */
755 tok = strtok(NULL, " ");
756 if (tok == NULL) continue;
758 /* Sixth column: Permissions */
760 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
761 if (tok == NULL) continue;
762 tok = strtok(NULL, ")");
765 DEBUGP(("confusing VMS permissions, skipping line\n"));
769 /* Permissons have the format "RWED,RWED,RE" */
770 cur.perms = vmsperms(tok);
771 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
775 /* And put everything into the linked list */
778 l = dir = xnew (struct fileinfo);
779 memcpy (l, &cur, sizeof (cur));
780 l->prev = l->next = NULL;
785 l->next = xnew (struct fileinfo);
787 memcpy (l, &cur, sizeof (cur));
799 /* This function switches between the correct parsing routine depending on
800 the SYSTEM_TYPE. The system type should be based on the result of the
801 "SYST" response of the FTP server. According to this repsonse we will
802 use on of the three different listing parsers that cover the most of FTP
803 servers used nowadays. */
806 ftp_parse_ls (const char *file, const enum stype system_type)
811 return ftp_parse_unix_ls (file, 0);
814 /* Detect whether the listing is simulating the UNIX format */
817 fp = fopen (file, "rb");
820 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
825 /* If the first character of the file is '0'-'9', it's WINNT
827 if (c >= '0' && c <='9')
828 return ftp_parse_winnt_ls (file);
830 return ftp_parse_unix_ls (file, 1);
833 return ftp_parse_vms_ls (file);
835 return ftp_parse_unix_ls (file, 1);
837 logprintf (LOG_NOTQUIET, _("\
838 Unsupported listing type, trying Unix listing parser.\n"));
839 return ftp_parse_unix_ls (file, 0);
843 /* Stuff for creating FTP index. */
845 /* The function creates an HTML index containing references to given
846 directories and files on the appropriate host. The references are
849 ftp_index (const char *file, struct url *u, struct fileinfo *f)
853 char *htcldir; /* HTML-clean dir name */
854 char *htclfile; /* HTML-clean file name */
855 char *urlclfile; /* URL-clean file name */
859 fp = fopen (file, "wb");
862 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
870 char *tmpu, *tmpp; /* temporary, clean user and passwd */
872 tmpu = url_escape (u->user);
873 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
875 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
877 upwd = concat_strings (tmpu, "@", (char *) 0);
884 htcldir = html_quote_string (u->dir);
886 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
887 fprintf (fp, "<html>\n<head>\n<title>");
888 fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
889 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
890 fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
891 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
898 /* #### Should we translate the months? Or, even better, use
900 static const char *months[] = {
901 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
902 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
904 time_t tstamp = f->tstamp;
905 struct tm *ptm = localtime (&tstamp);
907 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
910 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
915 fprintf (fp, _("time unknown "));
919 fprintf (fp, _("File "));
922 fprintf (fp, _("Directory "));
925 fprintf (fp, _("Link "));
928 fprintf (fp, _("Not sure "));
931 htclfile = html_quote_string (f->name);
932 urlclfile = url_escape_unsafe_and_reserved (f->name);
933 fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
936 /* XXX: Should probably URL-escape dir components here, rather
937 * than just HTML-escape, for consistency with the next bit where
938 * we use urlclfile for the file component. Anyway, this is safer
939 * than what we had... */
940 fprintf (fp, "%s", htcldir);
943 fprintf (fp, "%s", urlclfile);
944 if (f->type == FT_DIRECTORY)
946 fprintf (fp, "\">%s", htclfile);
947 if (f->type == FT_DIRECTORY)
949 fprintf (fp, "</a> ");
950 if (f->type == FT_PLAINFILE)
951 fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
952 else if (f->type == FT_SYMLINK)
953 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
959 fprintf (fp, "</pre>\n</body>\n</html>\n");