1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
43 #include <sys/types.h>
51 extern FILE *output_stream;
53 /* Converts symbolic permissions to number-style ones, e.g. string
54 rwxr-xr-x to 755. For now, it knows nothing of
55 setuid/setgid/sticky. ACLs are ignored. */
57 symperms (const char *s)
63 for (i = 0; i < 3; i++, s += 3)
66 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
67 (s[2] == 'x' || s[2] == 's'));
73 /* Cleans a line of text so that it can be consistently parsed. Destroys
74 <CR> and <LF> in case that thay occur at the end of the line and
75 replaces all <TAB> character with <SPACE>. Returns the length of the
78 clean_line(char *line)
80 int len = strlen (line);
82 if (line[len - 1] == '\n')
84 if (line[len - 1] == '\r')
86 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
90 /* Convert the Un*x-ish style directory listing stored in FILE to a
91 linked list of fileinfo (system-independent) entries. The contents
92 of FILE are considered to be produced by the standard Unix `ls -la'
93 output (whatever that might be). BSD (no group) and SYSV (with
94 group) listings are handled.
96 The time stamps are stored in a separate variable, time_t
97 compatible (I hope). The timezones are ignored. */
98 static struct fileinfo *
99 ftp_parse_unix_ls (const char *file, int ignore_perms)
102 static const char *months[] = {
103 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
104 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
106 int next, len, i, error, ignore;
107 int year, month, day; /* for time analysis */
109 struct tm timestruct, *tnow;
112 char *line, *tok; /* tokenizer */
113 struct fileinfo *dir, *l, cur; /* list creation */
115 fp = fopen (file, "rb");
118 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
123 /* Line loop to end of file: */
124 while ((line = read_whole_line (fp)) != NULL)
126 len = clean_line (line);
127 /* Skip if total... */
128 if (!strncasecmp (line, "total", 5))
133 /* Get the first token (permissions). */
134 tok = strtok (line, " ");
144 /* Decide whether we deal with a file or a directory. */
148 cur.type = FT_PLAINFILE;
149 DEBUGP (("PLAINFILE; "));
152 cur.type = FT_DIRECTORY;
153 DEBUGP (("DIRECTORY; "));
156 cur.type = FT_SYMLINK;
157 DEBUGP (("SYMLINK; "));
160 cur.type = FT_UNKNOWN;
161 DEBUGP (("UNKNOWN; "));
176 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
179 DEBUGP (("implicit perms %0o; ", cur.perms));
183 cur.perms = symperms (tok + 1);
184 DEBUGP (("perms %0o; ", cur.perms));
187 error = ignore = 0; /* Erroneous and ignoring entries are
188 treated equally for now. */
189 year = hour = min = sec = 0; /* Silence the compiler. */
192 /* While there are tokens on the line, parse them. Next is the
193 number of tokens left until the filename.
195 Use the month-name token as the "anchor" (the place where the
196 position wrt the file name is "known"). When a month name is
197 encountered, `next' is set to 5. Also, the preceding
198 characters are parsed to get the file size.
200 This tactic is quite dubious when it comes to
201 internationalization issues (non-English month names), but it
203 while ((tok = strtok (NULL, " ")) != NULL)
206 if (next < 0) /* a month name was not encountered */
208 for (i = 0; i < 12; i++)
209 if (!strcmp (tok, months[i]))
211 /* If we got a month, it means the token before it is the
212 size, and the filename is three tokens away. */
217 /* Back up to the beginning of the previous token
218 and parse it with str_to_wgint. */
220 while (t > line && ISDIGIT (*t))
224 /* Something has gone wrong during parsing. */
229 size = str_to_wgint (t, NULL, 10);
230 if (size == WGINT_MAX && errno == ERANGE)
231 /* Out of range -- ignore the size. #### Should
232 we refuse to start the download. */
239 DEBUGP (("month: %s; ", months[month]));
242 else if (next == 4) /* days */
244 if (tok[1]) /* two-digit... */
245 day = 10 * (*tok - '0') + tok[1] - '0';
246 else /* ...or one-digit */
248 DEBUGP (("day: %d; ", day));
252 /* This ought to be either the time, or the year. Let's
255 If we have a number x, it's a year. If we have x:y,
256 it's hours and minutes. If we have x:y:z, z are
259 min = hour = sec = 0;
260 /* We must deal with digits. */
263 /* Suppose it's year. */
264 for (; ISDIGIT (*tok); tok++)
265 year = (*tok - '0') + 10 * year;
268 /* This means these were hours! */
272 /* Get the minutes... */
273 for (; ISDIGIT (*tok); tok++)
274 min = (*tok - '0') + 10 * min;
277 /* ...and the seconds. */
279 for (; ISDIGIT (*tok); tok++)
280 sec = (*tok - '0') + 10 * sec;
285 DEBUGP (("year: %d (no tm); ", year));
287 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
289 else if (next == 2) /* The file name */
294 /* Since the file name may contain a SPC, it is possible
295 for strtok to handle it wrong. */
296 fnlen = strlen (tok);
297 if (fnlen < len - (tok - line))
299 /* So we have a SPC in the file name. Restore the
302 /* If the file is a symbolic link, it should have a
304 if (cur.type == FT_SYMLINK)
306 p = strstr (tok, " -> ");
312 cur.linkto = xstrdup (p + 4);
313 DEBUGP (("link to: %s\n", cur.linkto));
314 /* And separate it from the file name. */
318 /* If we have the filename, add it to the list of files or
320 /* "." and ".." are an exception! */
321 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
323 DEBUGP (("\nIgnoring `.' and `..'; "));
327 /* Some FTP sites choose to have ls -F as their default
328 LIST output, which marks the symlinks with a trailing
329 `@', directory names with a trailing `/' and
330 executables with a trailing `*'. This is no problem
331 unless encountering a symbolic link ending with `@',
332 or an executable ending with `*' on a server without
333 default -F output. I believe these cases are very
335 fnlen = strlen (tok); /* re-calculate `fnlen' */
336 cur.name = (char *)xmalloc (fnlen + 1);
337 memcpy (cur.name, tok, fnlen + 1);
340 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
342 cur.name[fnlen - 1] = '\0';
343 DEBUGP (("trailing `/' on dir.\n"));
345 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
347 cur.name[fnlen - 1] = '\0';
348 DEBUGP (("trailing `@' on link.\n"));
350 else if (cur.type == FT_PLAINFILE
351 && (cur.perms & 0111)
352 && cur.name[fnlen - 1] == '*')
354 cur.name[fnlen - 1] = '\0';
355 DEBUGP (("trailing `*' on exec.\n"));
366 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
373 DEBUGP (("Skipping.\n"));
374 xfree_null (cur.name);
375 xfree_null (cur.linkto);
382 l = dir = xnew (struct fileinfo);
383 memcpy (l, &cur, sizeof (cur));
384 l->prev = l->next = NULL;
389 l->next = xnew (struct fileinfo);
391 memcpy (l, &cur, sizeof (cur));
394 /* Get the current time. */
395 timenow = time (NULL);
396 tnow = localtime (&timenow);
397 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
398 timestruct.tm_sec = sec;
399 timestruct.tm_min = min;
400 timestruct.tm_hour = hour;
401 timestruct.tm_mday = day;
402 timestruct.tm_mon = month;
405 /* Some listings will not specify the year if it is "obvious"
406 that the file was from the previous year. E.g. if today
407 is 97-01-12, and you see a file of Dec 15th, its year is
408 1996, not 1997. Thanks to Vladimir Volovich for
410 if (month > tnow->tm_mon)
411 timestruct.tm_year = tnow->tm_year - 1;
413 timestruct.tm_year = tnow->tm_year;
416 timestruct.tm_year = year;
417 if (timestruct.tm_year >= 1900)
418 timestruct.tm_year -= 1900;
419 timestruct.tm_wday = 0;
420 timestruct.tm_yday = 0;
421 timestruct.tm_isdst = -1;
422 l->tstamp = mktime (×truct); /* store the time-stamp */
431 static struct fileinfo *
432 ftp_parse_winnt_ls (const char *file)
436 int year, month, day; /* for time analysis */
438 struct tm timestruct;
440 char *line, *tok; /* tokenizer */
441 struct fileinfo *dir, *l, cur; /* list creation */
443 fp = fopen (file, "rb");
446 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
451 /* Line loop to end of file: */
452 while ((line = read_whole_line (fp)) != NULL)
454 len = clean_line (line);
456 /* Extracting name is a bit of black magic and we have to do it
457 before `strtok' inserted extra \0 characters in the line
458 string. For the moment let us just suppose that the name starts at
459 column 39 of the listing. This way we could also recognize
460 filenames that begin with a series of space characters (but who
461 really wants to use such filenames anyway?). */
462 if (len < 40) continue;
464 cur.name = xstrdup(tok);
465 DEBUGP(("Name: '%s'\n", cur.name));
467 /* First column: mm-dd-yy. Should atoi() on the month fail, january
469 tok = strtok(line, "-");
470 if (tok == NULL) continue;
471 month = atoi(tok) - 1;
472 if (month < 0) month = 0;
473 tok = strtok(NULL, "-");
474 if (tok == NULL) continue;
476 tok = strtok(NULL, " ");
477 if (tok == NULL) continue;
479 /* Assuming the epoch starting at 1.1.1970 */
480 if (year <= 70) year += 100;
482 /* Second column: hh:mm[AP]M, listing does not contain value for
484 tok = strtok(NULL, ":");
485 if (tok == NULL) continue;
487 tok = strtok(NULL, "M");
488 if (tok == NULL) continue;
490 /* Adjust hour from AM/PM. Just for the record, the sequence goes
491 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
493 if (hour == 12) hour = 0;
494 if (*tok == 'P') hour += 12;
496 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
497 year+1900, month, day, hour, min));
499 /* Build the time-stamp (copy & paste from above) */
500 timestruct.tm_sec = 0;
501 timestruct.tm_min = min;
502 timestruct.tm_hour = hour;
503 timestruct.tm_mday = day;
504 timestruct.tm_mon = month;
505 timestruct.tm_year = year;
506 timestruct.tm_wday = 0;
507 timestruct.tm_yday = 0;
508 timestruct.tm_isdst = -1;
509 cur.tstamp = mktime (×truct); /* store the time-stamp */
511 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
513 /* Third column: Either file length, or <DIR>. We also set the
514 permissions (guessed as 0644 for plain files and 0755 for
515 directories as the listing does not give us a clue) and filetype
517 tok = strtok(NULL, " ");
518 if (tok == NULL) continue;
519 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
520 if (tok == NULL) continue;
523 cur.type = FT_DIRECTORY;
526 DEBUGP(("Directory\n"));
531 cur.type = FT_PLAINFILE;
533 size = str_to_wgint (tok, NULL, 10);
534 if (size == WGINT_MAX && errno == ERANGE)
535 cur.size = 0; /* overflow */
539 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
544 /* And put everything into the linked list */
547 l = dir = xnew (struct fileinfo);
548 memcpy (l, &cur, sizeof (cur));
549 l->prev = l->next = NULL;
554 l->next = xnew (struct fileinfo);
556 memcpy (l, &cur, sizeof (cur));
567 /* Converts VMS symbolic permissions to number-style ones, e.g. string
568 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
569 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
571 vmsperms (const char *s)
578 case ',': perms <<= 3; break;
579 case 'R': perms |= 4; break;
580 case 'W': perms |= 2; break;
581 case 'D': perms |= 2; break;
582 case 'E': perms |= 1; break;
583 default: DEBUGP(("wrong VMS permissons!\n"));
591 static struct fileinfo *
592 ftp_parse_vms_ls (const char *file)
595 /* #### A third copy of more-or-less the same array ? */
596 static const char *months[] = {
597 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
598 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
601 int year, month, day; /* for time analysis */
603 struct tm timestruct;
605 char *line, *tok; /* tokenizer */
606 struct fileinfo *dir, *l, cur; /* list creation */
608 fp = fopen (file, "rb");
611 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
616 /* Skip empty line. */
617 line = read_whole_line (fp);
620 /* Skip "Directory PUB$DEVICE[PUB]" */
621 line = read_whole_line (fp);
624 /* Skip empty line. */
625 line = read_whole_line (fp);
628 /* Line loop to end of file: */
629 while ((line = read_whole_line (fp)) != NULL)
632 i = clean_line (line);
639 /* First column: Name. A bit of black magic again. The name my be
640 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
641 line. Therefore we will first try to get the complete name
642 until the first space character; if it fails, we assume that the name
643 occupies the whole line. After that we search for the version
644 separator ";", we remove it and check the extension of the file;
645 extension .DIR denotes directory. */
647 tok = strtok(line, " ");
648 if (tok == NULL) tok = line;
649 DEBUGP(("file name: '%s'\n", tok));
650 for (p = tok ; *p && *p != ';' ; p++);
651 if (*p == ';') *p = '\0';
652 p = tok + strlen(tok) - 4;
653 if (!strcmp(p, ".DIR")) *p = '\0';
654 cur.name = xstrdup(tok);
655 DEBUGP(("Name: '%s'\n", cur.name));
657 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
658 the file size to zero as the listing does tell us only the size in
659 filesystem blocks - for an integrity check (when mirroring, for
660 example) we would need the size in bytes. */
664 cur.type = FT_DIRECTORY;
666 DEBUGP(("Directory\n"));
670 cur.type = FT_PLAINFILE;
676 /* Second column, if exists, or the first column of the next line
677 contain file size in blocks. We will skip it. */
679 tok = strtok(NULL, " ");
682 DEBUGP(("Getting additional line\n"));
684 line = read_whole_line (fp);
687 DEBUGP(("empty line read, leaving listing parser\n"));
690 i = clean_line (line);
693 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
697 tok = strtok(line, " ");
699 DEBUGP(("second token: '%s'\n", tok));
701 /* Third/Second column: Date DD-MMM-YYYY. */
703 tok = strtok(NULL, "-");
704 if (tok == NULL) continue;
705 DEBUGP(("day: '%s'\n",tok));
707 tok = strtok(NULL, "-");
710 /* If the server produces garbage like
711 'EA95_0PS.GZ;1 No privilege for attempted operation'
712 the first strtok(NULL, "-") will return everything until the end
713 of the line and only the next strtok() call will return NULL. */
714 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
718 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
719 /* Uknown months are mapped to January */
721 tok = strtok (NULL, " ");
722 if (tok == NULL) continue;
723 year = atoi (tok) - 1900;
724 DEBUGP(("date parsed\n"));
726 /* Fourth/Third column: Time hh:mm[:ss] */
727 tok = strtok (NULL, " ");
728 if (tok == NULL) continue;
732 for (; *p && *p != ':'; ++p);
735 for (; *p && *p != ':'; ++p);
739 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
740 year+1900, month, day, hour, min, sec));
742 /* Build the time-stamp (copy & paste from above) */
743 timestruct.tm_sec = sec;
744 timestruct.tm_min = min;
745 timestruct.tm_hour = hour;
746 timestruct.tm_mday = day;
747 timestruct.tm_mon = month;
748 timestruct.tm_year = year;
749 timestruct.tm_wday = 0;
750 timestruct.tm_yday = 0;
751 timestruct.tm_isdst = -1;
752 cur.tstamp = mktime (×truct); /* store the time-stamp */
754 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
756 /* Skip the fifth column */
758 tok = strtok(NULL, " ");
759 if (tok == NULL) continue;
761 /* Sixth column: Permissions */
763 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
764 if (tok == NULL) continue;
765 tok = strtok(NULL, ")");
768 DEBUGP(("confusing VMS permissions, skipping line\n"));
772 /* Permissons have the format "RWED,RWED,RE" */
773 cur.perms = vmsperms(tok);
774 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
778 /* And put everything into the linked list */
781 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
782 memcpy (l, &cur, sizeof (cur));
783 l->prev = l->next = NULL;
788 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
790 memcpy (l, &cur, sizeof (cur));
802 /* This function switches between the correct parsing routine depending on
803 the SYSTEM_TYPE. The system type should be based on the result of the
804 "SYST" response of the FTP server. According to this repsonse we will
805 use on of the three different listing parsers that cover the most of FTP
806 servers used nowadays. */
809 ftp_parse_ls (const char *file, const enum stype system_type)
814 return ftp_parse_unix_ls (file, 0);
817 /* Detect whether the listing is simulating the UNIX format */
820 fp = fopen (file, "rb");
823 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
828 /* If the first character of the file is '0'-'9', it's WINNT
830 if (c >= '0' && c <='9')
831 return ftp_parse_winnt_ls (file);
833 return ftp_parse_unix_ls (file, 1);
836 return ftp_parse_vms_ls (file);
838 return ftp_parse_unix_ls (file, 1);
840 logprintf (LOG_NOTQUIET, _("\
841 Unsupported listing type, trying Unix listing parser.\n"));
842 return ftp_parse_unix_ls (file, 0);
846 /* Stuff for creating FTP index. */
848 /* The function creates an HTML index containing references to given
849 directories and files on the appropriate host. The references are
852 ftp_index (const char *file, struct url *u, struct fileinfo *f)
856 char *htclfile; /* HTML-clean file name */
860 fp = fopen (file, "wb");
863 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
871 char *tmpu, *tmpp; /* temporary, clean user and passwd */
873 tmpu = url_escape (u->user);
874 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
876 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
878 upwd = concat_strings (tmpu, "@", (char *) 0);
884 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
885 fprintf (fp, "<html>\n<head>\n<title>");
886 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
887 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
888 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
889 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
895 /* #### Should we translate the months? Or, even better, use
897 static const char *months[] = {
898 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
899 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
901 struct tm *ptm = localtime ((time_t *)&f->tstamp);
903 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
906 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
911 fprintf (fp, _("time unknown "));
915 fprintf (fp, _("File "));
918 fprintf (fp, _("Directory "));
921 fprintf (fp, _("Link "));
924 fprintf (fp, _("Not sure "));
927 htclfile = html_quote_string (f->name);
928 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
931 fprintf (fp, "%s", u->dir);
934 fprintf (fp, "%s", htclfile);
935 if (f->type == FT_DIRECTORY)
937 fprintf (fp, "\">%s", htclfile);
938 if (f->type == FT_DIRECTORY)
940 fprintf (fp, "</a> ");
941 if (f->type == FT_PLAINFILE)
942 fprintf (fp, _(" (%s bytes)"), with_thousand_seps (f->size));
943 else if (f->type == FT_SYMLINK)
944 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
949 fprintf (fp, "</pre>\n</body>\n</html>\n");