1 /* Parsing FTP `ls' output.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
44 #include "convert.h" /* for html_quote_string prototype */
45 #include "retr.h" /* for output_stream */
47 /* Converts symbolic permissions to number-style ones, e.g. string
48 rwxr-xr-x to 755. For now, it knows nothing of
49 setuid/setgid/sticky. ACLs are ignored. */
51 symperms (const char *s)
57 for (i = 0; i < 3; i++, s += 3)
60 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
61 (s[2] == 'x' || s[2] == 's'));
67 /* Cleans a line of text so that it can be consistently parsed. Destroys
68 <CR> and <LF> in case that thay occur at the end of the line and
69 replaces all <TAB> character with <SPACE>. Returns the length of the
72 clean_line(char *line)
74 int len = strlen (line);
76 if (line[len - 1] == '\n')
78 if (line[len - 1] == '\r')
80 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
84 /* Convert the Un*x-ish style directory listing stored in FILE to a
85 linked list of fileinfo (system-independent) entries. The contents
86 of FILE are considered to be produced by the standard Unix `ls -la'
87 output (whatever that might be). BSD (no group) and SYSV (with
88 group) listings are handled.
90 The time stamps are stored in a separate variable, time_t
91 compatible (I hope). The timezones are ignored. */
92 static struct fileinfo *
93 ftp_parse_unix_ls (const char *file, int ignore_perms)
96 static const char *months[] = {
97 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
98 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
100 int next, len, i, error, ignore;
101 int year, month, day; /* for time analysis */
103 struct tm timestruct, *tnow;
106 char *line, *tok, *ptok; /* tokenizer */
107 struct fileinfo *dir, *l, cur; /* list creation */
109 fp = fopen (file, "rb");
112 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
117 /* Line loop to end of file: */
118 while ((line = read_whole_line (fp)) != NULL)
120 len = clean_line (line);
121 /* Skip if total... */
122 if (!strncasecmp (line, "total", 5))
127 /* Get the first token (permissions). */
128 tok = strtok (line, " ");
138 /* Decide whether we deal with a file or a directory. */
142 cur.type = FT_PLAINFILE;
143 DEBUGP (("PLAINFILE; "));
146 cur.type = FT_DIRECTORY;
147 DEBUGP (("DIRECTORY; "));
150 cur.type = FT_SYMLINK;
151 DEBUGP (("SYMLINK; "));
154 cur.type = FT_UNKNOWN;
155 DEBUGP (("UNKNOWN; "));
170 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
173 DEBUGP (("implicit perms %0o; ", cur.perms));
177 cur.perms = symperms (tok + 1);
178 DEBUGP (("perms %0o; ", cur.perms));
181 error = ignore = 0; /* Erroneous and ignoring entries are
182 treated equally for now. */
183 year = hour = min = sec = 0; /* Silence the compiler. */
186 /* While there are tokens on the line, parse them. Next is the
187 number of tokens left until the filename.
189 Use the month-name token as the "anchor" (the place where the
190 position wrt the file name is "known"). When a month name is
191 encountered, `next' is set to 5. Also, the preceding
192 characters are parsed to get the file size.
194 This tactic is quite dubious when it comes to
195 internationalization issues (non-English month names), but it
199 (tok = strtok (NULL, " ")) != NULL)
202 if (next < 0) /* a month name was not encountered */
204 for (i = 0; i < 12; i++)
205 if (!strcmp (tok, months[i]))
207 /* If we got a month, it means the token before it is the
208 size, and the filename is three tokens away. */
213 /* Parse the previous token with str_to_wgint. */
216 /* Something has gone wrong during parsing. */
221 size = str_to_wgint (ptok, NULL, 10);
222 if (size == WGINT_MAX && errno == ERANGE)
223 /* Out of range -- ignore the size. #### Should
224 we refuse to start the download. */
228 DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
232 DEBUGP (("month: %s; ", months[month]));
235 else if (next == 4) /* days */
237 if (tok[1]) /* two-digit... */
238 day = 10 * (*tok - '0') + tok[1] - '0';
239 else /* ...or one-digit */
241 DEBUGP (("day: %d; ", day));
245 /* This ought to be either the time, or the year. Let's
248 If we have a number x, it's a year. If we have x:y,
249 it's hours and minutes. If we have x:y:z, z are
252 min = hour = sec = 0;
253 /* We must deal with digits. */
254 if (c_isdigit (*tok))
256 /* Suppose it's year. */
257 for (; c_isdigit (*tok); tok++)
258 year = (*tok - '0') + 10 * year;
261 /* This means these were hours! */
265 /* Get the minutes... */
266 for (; c_isdigit (*tok); tok++)
267 min = (*tok - '0') + 10 * min;
270 /* ...and the seconds. */
272 for (; c_isdigit (*tok); tok++)
273 sec = (*tok - '0') + 10 * sec;
278 DEBUGP (("year: %d (no tm); ", year));
280 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
282 else if (next == 2) /* The file name */
287 /* Since the file name may contain a SPC, it is possible
288 for strtok to handle it wrong. */
289 fnlen = strlen (tok);
290 if (fnlen < len - (tok - line))
292 /* So we have a SPC in the file name. Restore the
295 /* If the file is a symbolic link, it should have a
297 if (cur.type == FT_SYMLINK)
299 p = strstr (tok, " -> ");
305 cur.linkto = xstrdup (p + 4);
306 DEBUGP (("link to: %s\n", cur.linkto));
307 /* And separate it from the file name. */
311 /* If we have the filename, add it to the list of files or
313 /* "." and ".." are an exception! */
314 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
316 DEBUGP (("\nIgnoring `.' and `..'; "));
320 /* Some FTP sites choose to have ls -F as their default
321 LIST output, which marks the symlinks with a trailing
322 `@', directory names with a trailing `/' and
323 executables with a trailing `*'. This is no problem
324 unless encountering a symbolic link ending with `@',
325 or an executable ending with `*' on a server without
326 default -F output. I believe these cases are very
328 fnlen = strlen (tok); /* re-calculate `fnlen' */
329 cur.name = xmalloc (fnlen + 1);
330 memcpy (cur.name, tok, fnlen + 1);
333 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
335 cur.name[fnlen - 1] = '\0';
336 DEBUGP (("trailing `/' on dir.\n"));
338 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
340 cur.name[fnlen - 1] = '\0';
341 DEBUGP (("trailing `@' on link.\n"));
343 else if (cur.type == FT_PLAINFILE
344 && (cur.perms & 0111)
345 && cur.name[fnlen - 1] == '*')
347 cur.name[fnlen - 1] = '\0';
348 DEBUGP (("trailing `*' on exec.\n"));
359 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
362 DEBUGP (("%s\n", cur.name ? cur.name : ""));
366 DEBUGP (("Skipping.\n"));
367 xfree_null (cur.name);
368 xfree_null (cur.linkto);
375 l = dir = xnew (struct fileinfo);
376 memcpy (l, &cur, sizeof (cur));
377 l->prev = l->next = NULL;
382 l->next = xnew (struct fileinfo);
384 memcpy (l, &cur, sizeof (cur));
387 /* Get the current time. */
388 timenow = time (NULL);
389 tnow = localtime (&timenow);
390 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
391 timestruct.tm_sec = sec;
392 timestruct.tm_min = min;
393 timestruct.tm_hour = hour;
394 timestruct.tm_mday = day;
395 timestruct.tm_mon = month;
398 /* Some listings will not specify the year if it is "obvious"
399 that the file was from the previous year. E.g. if today
400 is 97-01-12, and you see a file of Dec 15th, its year is
401 1996, not 1997. Thanks to Vladimir Volovich for
403 if (month > tnow->tm_mon)
404 timestruct.tm_year = tnow->tm_year - 1;
406 timestruct.tm_year = tnow->tm_year;
409 timestruct.tm_year = year;
410 if (timestruct.tm_year >= 1900)
411 timestruct.tm_year -= 1900;
412 timestruct.tm_wday = 0;
413 timestruct.tm_yday = 0;
414 timestruct.tm_isdst = -1;
415 l->tstamp = mktime (×truct); /* store the time-stamp */
424 static struct fileinfo *
425 ftp_parse_winnt_ls (const char *file)
429 int year, month, day; /* for time analysis */
431 struct tm timestruct;
433 char *line, *tok; /* tokenizer */
434 struct fileinfo *dir, *l, cur; /* list creation */
436 fp = fopen (file, "rb");
439 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
444 /* Line loop to end of file: */
445 while ((line = read_whole_line (fp)) != NULL)
447 len = clean_line (line);
449 /* Extracting name is a bit of black magic and we have to do it
450 before `strtok' inserted extra \0 characters in the line
451 string. For the moment let us just suppose that the name starts at
452 column 39 of the listing. This way we could also recognize
453 filenames that begin with a series of space characters (but who
454 really wants to use such filenames anyway?). */
455 if (len < 40) continue;
457 cur.name = xstrdup(tok);
458 DEBUGP(("Name: '%s'\n", cur.name));
460 /* First column: mm-dd-yy. Should atoi() on the month fail, january
462 tok = strtok(line, "-");
463 if (tok == NULL) continue;
464 month = atoi(tok) - 1;
465 if (month < 0) month = 0;
466 tok = strtok(NULL, "-");
467 if (tok == NULL) continue;
469 tok = strtok(NULL, " ");
470 if (tok == NULL) continue;
472 /* Assuming the epoch starting at 1.1.1970 */
473 if (year <= 70) year += 100;
475 /* Second column: hh:mm[AP]M, listing does not contain value for
477 tok = strtok(NULL, ":");
478 if (tok == NULL) continue;
480 tok = strtok(NULL, "M");
481 if (tok == NULL) continue;
483 /* Adjust hour from AM/PM. Just for the record, the sequence goes
484 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
486 if (hour == 12) hour = 0;
487 if (*tok == 'P') hour += 12;
489 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
490 year+1900, month, day, hour, min));
492 /* Build the time-stamp (copy & paste from above) */
493 timestruct.tm_sec = 0;
494 timestruct.tm_min = min;
495 timestruct.tm_hour = hour;
496 timestruct.tm_mday = day;
497 timestruct.tm_mon = month;
498 timestruct.tm_year = year;
499 timestruct.tm_wday = 0;
500 timestruct.tm_yday = 0;
501 timestruct.tm_isdst = -1;
502 cur.tstamp = mktime (×truct); /* store the time-stamp */
504 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
506 /* Third column: Either file length, or <DIR>. We also set the
507 permissions (guessed as 0644 for plain files and 0755 for
508 directories as the listing does not give us a clue) and filetype
510 tok = strtok(NULL, " ");
511 if (tok == NULL) continue;
512 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
513 if (tok == NULL) continue;
516 cur.type = FT_DIRECTORY;
519 DEBUGP(("Directory\n"));
524 cur.type = FT_PLAINFILE;
526 size = str_to_wgint (tok, NULL, 10);
527 if (size == WGINT_MAX && errno == ERANGE)
528 cur.size = 0; /* overflow */
532 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
537 /* And put everything into the linked list */
540 l = dir = xnew (struct fileinfo);
541 memcpy (l, &cur, sizeof (cur));
542 l->prev = l->next = NULL;
547 l->next = xnew (struct fileinfo);
549 memcpy (l, &cur, sizeof (cur));
560 /* Converts VMS symbolic permissions to number-style ones, e.g. string
561 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
562 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
564 vmsperms (const char *s)
571 case ',': perms <<= 3; break;
572 case 'R': perms |= 4; break;
573 case 'W': perms |= 2; break;
574 case 'D': perms |= 2; break;
575 case 'E': perms |= 1; break;
576 default: DEBUGP(("wrong VMS permissons!\n"));
584 static struct fileinfo *
585 ftp_parse_vms_ls (const char *file)
588 /* #### A third copy of more-or-less the same array ? */
589 static const char *months[] = {
590 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
591 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
594 int year, month, day; /* for time analysis */
596 struct tm timestruct;
598 char *line, *tok; /* tokenizer */
599 struct fileinfo *dir, *l, cur; /* list creation */
601 fp = fopen (file, "rb");
604 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
609 /* Skip empty line. */
610 line = read_whole_line (fp);
613 /* Skip "Directory PUB$DEVICE[PUB]" */
614 line = read_whole_line (fp);
617 /* Skip empty line. */
618 line = read_whole_line (fp);
621 /* Line loop to end of file: */
622 while ((line = read_whole_line (fp)) != NULL)
625 i = clean_line (line);
632 /* First column: Name. A bit of black magic again. The name my be
633 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
634 line. Therefore we will first try to get the complete name
635 until the first space character; if it fails, we assume that the name
636 occupies the whole line. After that we search for the version
637 separator ";", we remove it and check the extension of the file;
638 extension .DIR denotes directory. */
640 tok = strtok(line, " ");
641 if (tok == NULL) tok = line;
642 DEBUGP(("file name: '%s'\n", tok));
643 for (p = tok ; *p && *p != ';' ; p++)
645 if (*p == ';') *p = '\0';
646 p = tok + strlen(tok) - 4;
647 if (!strcmp(p, ".DIR")) *p = '\0';
648 cur.name = xstrdup(tok);
649 DEBUGP(("Name: '%s'\n", cur.name));
651 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
652 the file size to zero as the listing does tell us only the size in
653 filesystem blocks - for an integrity check (when mirroring, for
654 example) we would need the size in bytes. */
658 cur.type = FT_DIRECTORY;
660 DEBUGP(("Directory\n"));
664 cur.type = FT_PLAINFILE;
670 /* Second column, if exists, or the first column of the next line
671 contain file size in blocks. We will skip it. */
673 tok = strtok(NULL, " ");
676 DEBUGP(("Getting additional line\n"));
678 line = read_whole_line (fp);
681 DEBUGP(("empty line read, leaving listing parser\n"));
684 i = clean_line (line);
687 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
691 tok = strtok(line, " ");
693 DEBUGP(("second token: '%s'\n", tok));
695 /* Third/Second column: Date DD-MMM-YYYY. */
697 tok = strtok(NULL, "-");
698 if (tok == NULL) continue;
699 DEBUGP(("day: '%s'\n",tok));
701 tok = strtok(NULL, "-");
704 /* If the server produces garbage like
705 'EA95_0PS.GZ;1 No privilege for attempted operation'
706 the first strtok(NULL, "-") will return everything until the end
707 of the line and only the next strtok() call will return NULL. */
708 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
712 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
713 /* Uknown months are mapped to January */
715 tok = strtok (NULL, " ");
716 if (tok == NULL) continue;
717 year = atoi (tok) - 1900;
718 DEBUGP(("date parsed\n"));
720 /* Fourth/Third column: Time hh:mm[:ss] */
721 tok = strtok (NULL, " ");
722 if (tok == NULL) continue;
726 for (; *p && *p != ':'; ++p)
730 for (; *p && *p != ':'; ++p)
735 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
736 year+1900, month, day, hour, min, sec));
738 /* Build the time-stamp (copy & paste from above) */
739 timestruct.tm_sec = sec;
740 timestruct.tm_min = min;
741 timestruct.tm_hour = hour;
742 timestruct.tm_mday = day;
743 timestruct.tm_mon = month;
744 timestruct.tm_year = year;
745 timestruct.tm_wday = 0;
746 timestruct.tm_yday = 0;
747 timestruct.tm_isdst = -1;
748 cur.tstamp = mktime (×truct); /* store the time-stamp */
750 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
752 /* Skip the fifth column */
754 tok = strtok(NULL, " ");
755 if (tok == NULL) continue;
757 /* Sixth column: Permissions */
759 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
760 if (tok == NULL) continue;
761 tok = strtok(NULL, ")");
764 DEBUGP(("confusing VMS permissions, skipping line\n"));
768 /* Permissons have the format "RWED,RWED,RE" */
769 cur.perms = vmsperms(tok);
770 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
774 /* And put everything into the linked list */
777 l = dir = xnew (struct fileinfo);
778 memcpy (l, &cur, sizeof (cur));
779 l->prev = l->next = NULL;
784 l->next = xnew (struct fileinfo);
786 memcpy (l, &cur, sizeof (cur));
798 /* This function switches between the correct parsing routine depending on
799 the SYSTEM_TYPE. The system type should be based on the result of the
800 "SYST" response of the FTP server. According to this repsonse we will
801 use on of the three different listing parsers that cover the most of FTP
802 servers used nowadays. */
805 ftp_parse_ls (const char *file, const enum stype system_type)
810 return ftp_parse_unix_ls (file, 0);
813 /* Detect whether the listing is simulating the UNIX format */
816 fp = fopen (file, "rb");
819 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
824 /* If the first character of the file is '0'-'9', it's WINNT
826 if (c >= '0' && c <='9')
827 return ftp_parse_winnt_ls (file);
829 return ftp_parse_unix_ls (file, 1);
832 return ftp_parse_vms_ls (file);
834 return ftp_parse_unix_ls (file, 1);
836 logprintf (LOG_NOTQUIET, _("\
837 Unsupported listing type, trying Unix listing parser.\n"));
838 return ftp_parse_unix_ls (file, 0);
842 /* Stuff for creating FTP index. */
844 /* The function creates an HTML index containing references to given
845 directories and files on the appropriate host. The references are
848 ftp_index (const char *file, struct url *u, struct fileinfo *f)
852 char *htclfile; /* HTML-clean file name */
856 fp = fopen (file, "wb");
859 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
867 char *tmpu, *tmpp; /* temporary, clean user and passwd */
869 tmpu = url_escape (u->user);
870 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
872 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
874 upwd = concat_strings (tmpu, "@", (char *) 0);
880 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
881 fprintf (fp, "<html>\n<head>\n<title>");
882 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
883 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
884 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
885 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
891 /* #### Should we translate the months? Or, even better, use
893 static const char *months[] = {
894 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
895 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
897 struct tm *ptm = localtime ((time_t *)&f->tstamp);
899 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
902 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
907 fprintf (fp, _("time unknown "));
911 fprintf (fp, _("File "));
914 fprintf (fp, _("Directory "));
917 fprintf (fp, _("Link "));
920 fprintf (fp, _("Not sure "));
923 htclfile = html_quote_string (f->name);
924 fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
927 fprintf (fp, "%s", u->dir);
930 fprintf (fp, "%s", htclfile);
931 if (f->type == FT_DIRECTORY)
933 fprintf (fp, "\">%s", htclfile);
934 if (f->type == FT_DIRECTORY)
936 fprintf (fp, "</a> ");
937 if (f->type == FT_PLAINFILE)
938 fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
939 else if (f->type == FT_SYMLINK)
940 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
945 fprintf (fp, "</pre>\n</body>\n</html>\n");