1 /* Parsing FTP `ls' output.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
43 #include "convert.h" /* for html_quote_string prototype */
44 #include "retr.h" /* for output_stream */
46 /* Converts symbolic permissions to number-style ones, e.g. string
47 rwxr-xr-x to 755. For now, it knows nothing of
48 setuid/setgid/sticky. ACLs are ignored. */
50 symperms (const char *s)
56 for (i = 0; i < 3; i++, s += 3)
59 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
60 (s[2] == 'x' || s[2] == 's'));
66 /* Cleans a line of text so that it can be consistently parsed. Destroys
67 <CR> and <LF> in case that thay occur at the end of the line and
68 replaces all <TAB> character with <SPACE>. Returns the length of the
71 clean_line(char *line)
73 int len = strlen (line);
75 if (line[len - 1] == '\n')
77 if (line[len - 1] == '\r')
79 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
83 /* Convert the Un*x-ish style directory listing stored in FILE to a
84 linked list of fileinfo (system-independent) entries. The contents
85 of FILE are considered to be produced by the standard Unix `ls -la'
86 output (whatever that might be). BSD (no group) and SYSV (with
87 group) listings are handled.
89 The time stamps are stored in a separate variable, time_t
90 compatible (I hope). The timezones are ignored. */
91 static struct fileinfo *
92 ftp_parse_unix_ls (const char *file, int ignore_perms)
95 static const char *months[] = {
96 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
97 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
99 int next, len, i, error, ignore;
100 int year, month, day; /* for time analysis */
102 struct tm timestruct, *tnow;
105 char *line, *tok, *ptok; /* tokenizer */
106 struct fileinfo *dir, *l, cur; /* list creation */
108 fp = fopen (file, "rb");
111 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
116 /* Line loop to end of file: */
117 while ((line = read_whole_line (fp)) != NULL)
119 len = clean_line (line);
120 /* Skip if total... */
121 if (!strncasecmp (line, "total", 5))
126 /* Get the first token (permissions). */
127 tok = strtok (line, " ");
137 /* Decide whether we deal with a file or a directory. */
141 cur.type = FT_PLAINFILE;
142 DEBUGP (("PLAINFILE; "));
145 cur.type = FT_DIRECTORY;
146 DEBUGP (("DIRECTORY; "));
149 cur.type = FT_SYMLINK;
150 DEBUGP (("SYMLINK; "));
153 cur.type = FT_UNKNOWN;
154 DEBUGP (("UNKNOWN; "));
169 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
172 DEBUGP (("implicit perms %0o; ", cur.perms));
176 cur.perms = symperms (tok + 1);
177 DEBUGP (("perms %0o; ", cur.perms));
180 error = ignore = 0; /* Erroneous and ignoring entries are
181 treated equally for now. */
182 year = hour = min = sec = 0; /* Silence the compiler. */
185 /* While there are tokens on the line, parse them. Next is the
186 number of tokens left until the filename.
188 Use the month-name token as the "anchor" (the place where the
189 position wrt the file name is "known"). When a month name is
190 encountered, `next' is set to 5. Also, the preceding
191 characters are parsed to get the file size.
193 This tactic is quite dubious when it comes to
194 internationalization issues (non-English month names), but it
198 (tok = strtok (NULL, " ")) != NULL)
201 if (next < 0) /* a month name was not encountered */
203 for (i = 0; i < 12; i++)
204 if (!strcmp (tok, months[i]))
206 /* If we got a month, it means the token before it is the
207 size, and the filename is three tokens away. */
212 /* Parse the previous token with str_to_wgint. */
215 /* Something has gone wrong during parsing. */
220 size = str_to_wgint (ptok, NULL, 10);
221 if (size == WGINT_MAX && errno == ERANGE)
222 /* Out of range -- ignore the size. #### Should
223 we refuse to start the download. */
227 DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
231 DEBUGP (("month: %s; ", months[month]));
234 else if (next == 4) /* days */
236 if (tok[1]) /* two-digit... */
237 day = 10 * (*tok - '0') + tok[1] - '0';
238 else /* ...or one-digit */
240 DEBUGP (("day: %d; ", day));
244 /* This ought to be either the time, or the year. Let's
247 If we have a number x, it's a year. If we have x:y,
248 it's hours and minutes. If we have x:y:z, z are
251 min = hour = sec = 0;
252 /* We must deal with digits. */
253 if (c_isdigit (*tok))
255 /* Suppose it's year. */
256 for (; c_isdigit (*tok); tok++)
257 year = (*tok - '0') + 10 * year;
260 /* This means these were hours! */
264 /* Get the minutes... */
265 for (; c_isdigit (*tok); tok++)
266 min = (*tok - '0') + 10 * min;
269 /* ...and the seconds. */
271 for (; c_isdigit (*tok); tok++)
272 sec = (*tok - '0') + 10 * sec;
277 DEBUGP (("year: %d (no tm); ", year));
279 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
281 else if (next == 2) /* The file name */
286 /* Since the file name may contain a SPC, it is possible
287 for strtok to handle it wrong. */
288 fnlen = strlen (tok);
289 if (fnlen < len - (tok - line))
291 /* So we have a SPC in the file name. Restore the
294 /* If the file is a symbolic link, it should have a
296 if (cur.type == FT_SYMLINK)
298 p = strstr (tok, " -> ");
304 cur.linkto = xstrdup (p + 4);
305 DEBUGP (("link to: %s\n", cur.linkto));
306 /* And separate it from the file name. */
310 /* If we have the filename, add it to the list of files or
312 /* "." and ".." are an exception! */
313 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
315 DEBUGP (("\nIgnoring `.' and `..'; "));
319 /* Some FTP sites choose to have ls -F as their default
320 LIST output, which marks the symlinks with a trailing
321 `@', directory names with a trailing `/' and
322 executables with a trailing `*'. This is no problem
323 unless encountering a symbolic link ending with `@',
324 or an executable ending with `*' on a server without
325 default -F output. I believe these cases are very
327 fnlen = strlen (tok); /* re-calculate `fnlen' */
328 cur.name = xmalloc (fnlen + 1);
329 memcpy (cur.name, tok, fnlen + 1);
332 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
334 cur.name[fnlen - 1] = '\0';
335 DEBUGP (("trailing `/' on dir.\n"));
337 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
339 cur.name[fnlen - 1] = '\0';
340 DEBUGP (("trailing `@' on link.\n"));
342 else if (cur.type == FT_PLAINFILE
343 && (cur.perms & 0111)
344 && cur.name[fnlen - 1] == '*')
346 cur.name[fnlen - 1] = '\0';
347 DEBUGP (("trailing `*' on exec.\n"));
358 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
361 DEBUGP (("%s\n", cur.name ? cur.name : ""));
365 DEBUGP (("Skipping.\n"));
366 xfree_null (cur.name);
367 xfree_null (cur.linkto);
374 l = dir = xnew (struct fileinfo);
375 memcpy (l, &cur, sizeof (cur));
376 l->prev = l->next = NULL;
381 l->next = xnew (struct fileinfo);
383 memcpy (l, &cur, sizeof (cur));
386 /* Get the current time. */
387 timenow = time (NULL);
388 tnow = localtime (&timenow);
389 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
390 timestruct.tm_sec = sec;
391 timestruct.tm_min = min;
392 timestruct.tm_hour = hour;
393 timestruct.tm_mday = day;
394 timestruct.tm_mon = month;
397 /* Some listings will not specify the year if it is "obvious"
398 that the file was from the previous year. E.g. if today
399 is 97-01-12, and you see a file of Dec 15th, its year is
400 1996, not 1997. Thanks to Vladimir Volovich for
402 if (month > tnow->tm_mon)
403 timestruct.tm_year = tnow->tm_year - 1;
405 timestruct.tm_year = tnow->tm_year;
408 timestruct.tm_year = year;
409 if (timestruct.tm_year >= 1900)
410 timestruct.tm_year -= 1900;
411 timestruct.tm_wday = 0;
412 timestruct.tm_yday = 0;
413 timestruct.tm_isdst = -1;
414 l->tstamp = mktime (×truct); /* store the time-stamp */
423 static struct fileinfo *
424 ftp_parse_winnt_ls (const char *file)
428 int year, month, day; /* for time analysis */
430 struct tm timestruct;
432 char *line, *tok; /* tokenizer */
433 struct fileinfo *dir, *l, cur; /* list creation */
435 fp = fopen (file, "rb");
438 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
443 /* Line loop to end of file: */
444 while ((line = read_whole_line (fp)) != NULL)
446 len = clean_line (line);
448 /* Extracting name is a bit of black magic and we have to do it
449 before `strtok' inserted extra \0 characters in the line
450 string. For the moment let us just suppose that the name starts at
451 column 39 of the listing. This way we could also recognize
452 filenames that begin with a series of space characters (but who
453 really wants to use such filenames anyway?). */
454 if (len < 40) continue;
456 cur.name = xstrdup(tok);
457 DEBUGP(("Name: '%s'\n", cur.name));
459 /* First column: mm-dd-yy. Should atoi() on the month fail, january
461 tok = strtok(line, "-");
462 if (tok == NULL) continue;
463 month = atoi(tok) - 1;
464 if (month < 0) month = 0;
465 tok = strtok(NULL, "-");
466 if (tok == NULL) continue;
468 tok = strtok(NULL, " ");
469 if (tok == NULL) continue;
471 /* Assuming the epoch starting at 1.1.1970 */
472 if (year <= 70) year += 100;
474 /* Second column: hh:mm[AP]M, listing does not contain value for
476 tok = strtok(NULL, ":");
477 if (tok == NULL) continue;
479 tok = strtok(NULL, "M");
480 if (tok == NULL) continue;
482 /* Adjust hour from AM/PM. Just for the record, the sequence goes
483 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
485 if (hour == 12) hour = 0;
486 if (*tok == 'P') hour += 12;
488 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
489 year+1900, month, day, hour, min));
491 /* Build the time-stamp (copy & paste from above) */
492 timestruct.tm_sec = 0;
493 timestruct.tm_min = min;
494 timestruct.tm_hour = hour;
495 timestruct.tm_mday = day;
496 timestruct.tm_mon = month;
497 timestruct.tm_year = year;
498 timestruct.tm_wday = 0;
499 timestruct.tm_yday = 0;
500 timestruct.tm_isdst = -1;
501 cur.tstamp = mktime (×truct); /* store the time-stamp */
503 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
505 /* Third column: Either file length, or <DIR>. We also set the
506 permissions (guessed as 0644 for plain files and 0755 for
507 directories as the listing does not give us a clue) and filetype
509 tok = strtok(NULL, " ");
510 if (tok == NULL) continue;
511 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
512 if (tok == NULL) continue;
515 cur.type = FT_DIRECTORY;
518 DEBUGP(("Directory\n"));
523 cur.type = FT_PLAINFILE;
525 size = str_to_wgint (tok, NULL, 10);
526 if (size == WGINT_MAX && errno == ERANGE)
527 cur.size = 0; /* overflow */
531 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
536 /* And put everything into the linked list */
539 l = dir = xnew (struct fileinfo);
540 memcpy (l, &cur, sizeof (cur));
541 l->prev = l->next = NULL;
546 l->next = xnew (struct fileinfo);
548 memcpy (l, &cur, sizeof (cur));
559 /* Converts VMS symbolic permissions to number-style ones, e.g. string
560 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
561 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
563 vmsperms (const char *s)
570 case ',': perms <<= 3; break;
571 case 'R': perms |= 4; break;
572 case 'W': perms |= 2; break;
573 case 'D': perms |= 2; break;
574 case 'E': perms |= 1; break;
575 default: DEBUGP(("wrong VMS permissons!\n"));
583 static struct fileinfo *
584 ftp_parse_vms_ls (const char *file)
587 /* #### A third copy of more-or-less the same array ? */
588 static const char *months[] = {
589 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
590 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
593 int year, month, day; /* for time analysis */
595 struct tm timestruct;
597 char *line, *tok; /* tokenizer */
598 struct fileinfo *dir, *l, cur; /* list creation */
600 fp = fopen (file, "rb");
603 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
608 /* Skip empty line. */
609 line = read_whole_line (fp);
612 /* Skip "Directory PUB$DEVICE[PUB]" */
613 line = read_whole_line (fp);
616 /* Skip empty line. */
617 line = read_whole_line (fp);
620 /* Line loop to end of file: */
621 while ((line = read_whole_line (fp)) != NULL)
624 i = clean_line (line);
631 /* First column: Name. A bit of black magic again. The name my be
632 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
633 line. Therefore we will first try to get the complete name
634 until the first space character; if it fails, we assume that the name
635 occupies the whole line. After that we search for the version
636 separator ";", we remove it and check the extension of the file;
637 extension .DIR denotes directory. */
639 tok = strtok(line, " ");
640 if (tok == NULL) tok = line;
641 DEBUGP(("file name: '%s'\n", tok));
642 for (p = tok ; *p && *p != ';' ; p++)
644 if (*p == ';') *p = '\0';
645 p = tok + strlen(tok) - 4;
646 if (!strcmp(p, ".DIR")) *p = '\0';
647 cur.name = xstrdup(tok);
648 DEBUGP(("Name: '%s'\n", cur.name));
650 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
651 the file size to zero as the listing does tell us only the size in
652 filesystem blocks - for an integrity check (when mirroring, for
653 example) we would need the size in bytes. */
657 cur.type = FT_DIRECTORY;
659 DEBUGP(("Directory\n"));
663 cur.type = FT_PLAINFILE;
669 /* Second column, if exists, or the first column of the next line
670 contain file size in blocks. We will skip it. */
672 tok = strtok(NULL, " ");
675 DEBUGP(("Getting additional line\n"));
677 line = read_whole_line (fp);
680 DEBUGP(("empty line read, leaving listing parser\n"));
683 i = clean_line (line);
686 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
690 tok = strtok(line, " ");
692 DEBUGP(("second token: '%s'\n", tok));
694 /* Third/Second column: Date DD-MMM-YYYY. */
696 tok = strtok(NULL, "-");
697 if (tok == NULL) continue;
698 DEBUGP(("day: '%s'\n",tok));
700 tok = strtok(NULL, "-");
703 /* If the server produces garbage like
704 'EA95_0PS.GZ;1 No privilege for attempted operation'
705 the first strtok(NULL, "-") will return everything until the end
706 of the line and only the next strtok() call will return NULL. */
707 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
711 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
712 /* Uknown months are mapped to January */
714 tok = strtok (NULL, " ");
715 if (tok == NULL) continue;
716 year = atoi (tok) - 1900;
717 DEBUGP(("date parsed\n"));
719 /* Fourth/Third column: Time hh:mm[:ss] */
720 tok = strtok (NULL, " ");
721 if (tok == NULL) continue;
725 for (; *p && *p != ':'; ++p)
729 for (; *p && *p != ':'; ++p)
734 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
735 year+1900, month, day, hour, min, sec));
737 /* Build the time-stamp (copy & paste from above) */
738 timestruct.tm_sec = sec;
739 timestruct.tm_min = min;
740 timestruct.tm_hour = hour;
741 timestruct.tm_mday = day;
742 timestruct.tm_mon = month;
743 timestruct.tm_year = year;
744 timestruct.tm_wday = 0;
745 timestruct.tm_yday = 0;
746 timestruct.tm_isdst = -1;
747 cur.tstamp = mktime (×truct); /* store the time-stamp */
749 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
751 /* Skip the fifth column */
753 tok = strtok(NULL, " ");
754 if (tok == NULL) continue;
756 /* Sixth column: Permissions */
758 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
759 if (tok == NULL) continue;
760 tok = strtok(NULL, ")");
763 DEBUGP(("confusing VMS permissions, skipping line\n"));
767 /* Permissons have the format "RWED,RWED,RE" */
768 cur.perms = vmsperms(tok);
769 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
773 /* And put everything into the linked list */
776 l = dir = xnew (struct fileinfo);
777 memcpy (l, &cur, sizeof (cur));
778 l->prev = l->next = NULL;
783 l->next = xnew (struct fileinfo);
785 memcpy (l, &cur, sizeof (cur));
797 /* This function switches between the correct parsing routine depending on
798 the SYSTEM_TYPE. The system type should be based on the result of the
799 "SYST" response of the FTP server. According to this repsonse we will
800 use on of the three different listing parsers that cover the most of FTP
801 servers used nowadays. */
804 ftp_parse_ls (const char *file, const enum stype system_type)
809 return ftp_parse_unix_ls (file, 0);
812 /* Detect whether the listing is simulating the UNIX format */
815 fp = fopen (file, "rb");
818 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
823 /* If the first character of the file is '0'-'9', it's WINNT
825 if (c >= '0' && c <='9')
826 return ftp_parse_winnt_ls (file);
828 return ftp_parse_unix_ls (file, 1);
831 return ftp_parse_vms_ls (file);
833 return ftp_parse_unix_ls (file, 1);
835 logprintf (LOG_NOTQUIET, _("\
836 Unsupported listing type, trying Unix listing parser.\n"));
837 return ftp_parse_unix_ls (file, 0);
841 /* Stuff for creating FTP index. */
843 /* The function creates an HTML index containing references to given
844 directories and files on the appropriate host. The references are
847 ftp_index (const char *file, struct url *u, struct fileinfo *f)
851 char *htclfile; /* HTML-clean file name */
855 fp = fopen (file, "wb");
858 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
866 char *tmpu, *tmpp; /* temporary, clean user and passwd */
868 tmpu = url_escape (u->user);
869 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
871 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
873 upwd = concat_strings (tmpu, "@", (char *) 0);
879 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
880 fprintf (fp, "<html>\n<head>\n<title>");
881 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
882 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
883 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
884 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
890 /* #### Should we translate the months? Or, even better, use
892 static const char *months[] = {
893 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
894 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
896 struct tm *ptm = localtime ((time_t *)&f->tstamp);
898 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
901 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
906 fprintf (fp, _("time unknown "));
910 fprintf (fp, _("File "));
913 fprintf (fp, _("Directory "));
916 fprintf (fp, _("Link "));
919 fprintf (fp, _("Not sure "));
922 htclfile = html_quote_string (f->name);
923 fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
926 fprintf (fp, "%s", u->dir);
929 fprintf (fp, "%s", htclfile);
930 if (f->type == FT_DIRECTORY)
932 fprintf (fp, "\">%s", htclfile);
933 if (f->type == FT_DIRECTORY)
935 fprintf (fp, "</a> ");
936 if (f->type == FT_PLAINFILE)
937 fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
938 else if (f->type == FT_SYMLINK)
939 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
944 fprintf (fp, "</pre>\n</body>\n</html>\n");