1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
43 #include <sys/types.h>
51 extern FILE *output_stream;
53 /* Converts symbolic permissions to number-style ones, e.g. string
54 rwxr-xr-x to 755. For now, it knows nothing of
55 setuid/setgid/sticky. ACLs are ignored. */
57 symperms (const char *s)
63 for (i = 0; i < 3; i++, s += 3)
66 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
67 (s[2] == 'x' || s[2] == 's'));
73 /* Cleans a line of text so that it can be consistently parsed. Destroys
74 <CR> and <LF> in case that thay occur at the end of the line and
75 replaces all <TAB> character with <SPACE>. Returns the length of the
78 clean_line(char *line)
80 int len = strlen (line);
82 if (line[len - 1] == '\n')
84 if (line[len - 1] == '\r')
86 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
90 /* Convert the Un*x-ish style directory listing stored in FILE to a
91 linked list of fileinfo (system-independent) entries. The contents
92 of FILE are considered to be produced by the standard Unix `ls -la'
93 output (whatever that might be). BSD (no group) and SYSV (with
94 group) listings are handled.
96 The time stamps are stored in a separate variable, time_t
97 compatible (I hope). The timezones are ignored. */
98 static struct fileinfo *
99 ftp_parse_unix_ls (const char *file, int ignore_perms)
102 static const char *months[] = {
103 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
104 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
106 int next, len, i, error, ignore;
107 int year, month, day; /* for time analysis */
109 struct tm timestruct, *tnow;
112 char *line, *tok; /* tokenizer */
113 struct fileinfo *dir, *l, cur; /* list creation */
115 fp = fopen (file, "rb");
118 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
123 /* Line loop to end of file: */
124 while ((line = read_whole_line (fp)))
126 len = clean_line (line);
127 /* Skip if total... */
128 if (!strncasecmp (line, "total", 5))
133 /* Get the first token (permissions). */
134 tok = strtok (line, " ");
144 /* Decide whether we deal with a file or a directory. */
148 cur.type = FT_PLAINFILE;
149 DEBUGP (("PLAINFILE; "));
152 cur.type = FT_DIRECTORY;
153 DEBUGP (("DIRECTORY; "));
156 cur.type = FT_SYMLINK;
157 DEBUGP (("SYMLINK; "));
160 cur.type = FT_UNKNOWN;
161 DEBUGP (("UNKNOWN; "));
176 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
179 DEBUGP (("implicit perms %0o; ", cur.perms));
183 cur.perms = symperms (tok + 1);
184 DEBUGP (("perms %0o; ", cur.perms));
187 error = ignore = 0; /* Erroneous and ignoring entries are
188 treated equally for now. */
189 year = hour = min = sec = 0; /* Silence the compiler. */
192 /* While there are tokens on the line, parse them. Next is the
193 number of tokens left until the filename.
195 Use the month-name token as the "anchor" (the place where the
196 position wrt the file name is "known"). When a month name is
197 encountered, `next' is set to 5. Also, the preceding
198 characters are parsed to get the file size.
200 This tactic is quite dubious when it comes to
201 internationalization issues (non-English month names), but it
203 while ((tok = strtok (NULL, " ")))
206 if (next < 0) /* a month name was not encountered */
208 for (i = 0; i < 12; i++)
209 if (!strcmp (tok, months[i]))
211 /* If we got a month, it means the token before it is the
212 size, and the filename is three tokens away. */
217 /* Back up to the beginning of the previous token
218 and parse it with str_to_wgint. */
220 while (t > line && ISDIGIT (*t))
224 /* Something has gone wrong during parsing. */
229 size = str_to_wgint (t, NULL, 10);
230 if (size == WGINT_MAX && errno == ERANGE)
231 /* Out of range -- ignore the size. #### Should
232 we refuse to start the download. */
239 DEBUGP (("month: %s; ", months[month]));
242 else if (next == 4) /* days */
244 if (tok[1]) /* two-digit... */
245 day = 10 * (*tok - '0') + tok[1] - '0';
246 else /* ...or one-digit */
248 DEBUGP (("day: %d; ", day));
252 /* This ought to be either the time, or the year. Let's
255 If we have a number x, it's a year. If we have x:y,
256 it's hours and minutes. If we have x:y:z, z are
259 min = hour = sec = 0;
260 /* We must deal with digits. */
263 /* Suppose it's year. */
264 for (; ISDIGIT (*tok); tok++)
265 year = (*tok - '0') + 10 * year;
268 /* This means these were hours! */
272 /* Get the minutes... */
273 for (; ISDIGIT (*tok); tok++)
274 min = (*tok - '0') + 10 * min;
277 /* ...and the seconds. */
279 for (; ISDIGIT (*tok); tok++)
280 sec = (*tok - '0') + 10 * sec;
285 DEBUGP (("year: %d (no tm); ", year));
287 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
289 else if (next == 2) /* The file name */
294 /* Since the file name may contain a SPC, it is possible
295 for strtok to handle it wrong. */
296 fnlen = strlen (tok);
297 if (fnlen < len - (tok - line))
299 /* So we have a SPC in the file name. Restore the
302 /* If the file is a symbolic link, it should have a
304 if (cur.type == FT_SYMLINK)
306 p = strstr (tok, " -> ");
312 cur.linkto = xstrdup (p + 4);
313 DEBUGP (("link to: %s\n", cur.linkto));
314 /* And separate it from the file name. */
318 /* If we have the filename, add it to the list of files or
320 /* "." and ".." are an exception! */
321 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
323 DEBUGP (("\nIgnoring `.' and `..'; "));
327 /* Some FTP sites choose to have ls -F as their default
328 LIST output, which marks the symlinks with a trailing
329 `@', directory names with a trailing `/' and
330 executables with a trailing `*'. This is no problem
331 unless encountering a symbolic link ending with `@',
332 or an executable ending with `*' on a server without
333 default -F output. I believe these cases are very
335 fnlen = strlen (tok); /* re-calculate `fnlen' */
336 cur.name = (char *)xmalloc (fnlen + 1);
337 memcpy (cur.name, tok, fnlen + 1);
340 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
342 cur.name[fnlen - 1] = '\0';
343 DEBUGP (("trailing `/' on dir.\n"));
345 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
347 cur.name[fnlen - 1] = '\0';
348 DEBUGP (("trailing `@' on link.\n"));
350 else if (cur.type == FT_PLAINFILE
351 && (cur.perms & 0111)
352 && cur.name[fnlen - 1] == '*')
354 cur.name[fnlen - 1] = '\0';
355 DEBUGP (("trailing `*' on exec.\n"));
366 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
373 DEBUGP (("Skipping.\n"));
374 xfree_null (cur.name);
375 xfree_null (cur.linkto);
382 l = dir = xnew (struct fileinfo);
383 memcpy (l, &cur, sizeof (cur));
384 l->prev = l->next = NULL;
389 l->next = xnew (struct fileinfo);
391 memcpy (l, &cur, sizeof (cur));
394 /* Get the current time. */
395 timenow = time (NULL);
396 tnow = localtime (&timenow);
397 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
398 timestruct.tm_sec = sec;
399 timestruct.tm_min = min;
400 timestruct.tm_hour = hour;
401 timestruct.tm_mday = day;
402 timestruct.tm_mon = month;
405 /* Some listings will not specify the year if it is "obvious"
406 that the file was from the previous year. E.g. if today
407 is 97-01-12, and you see a file of Dec 15th, its year is
408 1996, not 1997. Thanks to Vladimir Volovich for
410 if (month > tnow->tm_mon)
411 timestruct.tm_year = tnow->tm_year - 1;
413 timestruct.tm_year = tnow->tm_year;
416 timestruct.tm_year = year;
417 if (timestruct.tm_year >= 1900)
418 timestruct.tm_year -= 1900;
419 timestruct.tm_wday = 0;
420 timestruct.tm_yday = 0;
421 timestruct.tm_isdst = -1;
422 l->tstamp = mktime (×truct); /* store the time-stamp */
431 static struct fileinfo *
432 ftp_parse_winnt_ls (const char *file)
436 int year, month, day; /* for time analysis */
438 struct tm timestruct;
440 char *line, *tok; /* tokenizer */
441 struct fileinfo *dir, *l, cur; /* list creation */
443 fp = fopen (file, "rb");
446 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
451 /* Line loop to end of file: */
452 while ((line = read_whole_line (fp)))
454 len = clean_line (line);
456 /* Extracting name is a bit of black magic and we have to do it
457 before `strtok' inserted extra \0 characters in the line
458 string. For the moment let us just suppose that the name starts at
459 column 39 of the listing. This way we could also recognize
460 filenames that begin with a series of space characters (but who
461 really wants to use such filenames anyway?). */
462 if (len < 40) continue;
464 cur.name = xstrdup(tok);
465 DEBUGP(("Name: '%s'\n", cur.name));
467 /* First column: mm-dd-yy. Should atoi() on the month fail, january
469 tok = strtok(line, "-");
470 if (tok == NULL) continue;
471 month = atoi(tok) - 1;
472 if (month < 0) month = 0;
473 tok = strtok(NULL, "-");
474 if (tok == NULL) continue;
476 tok = strtok(NULL, " ");
477 if (tok == NULL) continue;
479 /* Assuming the epoch starting at 1.1.1970 */
480 if (year <= 70) year += 100;
482 /* Second column: hh:mm[AP]M, listing does not contain value for
484 tok = strtok(NULL, ":");
485 if (tok == NULL) continue;
487 tok = strtok(NULL, "M");
488 if (tok == NULL) continue;
490 /* Adjust hour from AM/PM. Just for the record, the sequence goes
491 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
493 if (hour == 12) hour = 0;
494 if (*tok == 'P') hour += 12;
496 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
497 year+1900, month, day, hour, min));
499 /* Build the time-stamp (copy & paste from above) */
500 timestruct.tm_sec = 0;
501 timestruct.tm_min = min;
502 timestruct.tm_hour = hour;
503 timestruct.tm_mday = day;
504 timestruct.tm_mon = month;
505 timestruct.tm_year = year;
506 timestruct.tm_wday = 0;
507 timestruct.tm_yday = 0;
508 timestruct.tm_isdst = -1;
509 cur.tstamp = mktime (×truct); /* store the time-stamp */
511 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
513 /* Third column: Either file length, or <DIR>. We also set the
514 permissions (guessed as 0644 for plain files and 0755 for
515 directories as the listing does not give us a clue) and filetype
517 tok = strtok(NULL, " ");
518 if (tok == NULL) continue;
519 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
520 if (tok == NULL) continue;
523 cur.type = FT_DIRECTORY;
526 DEBUGP(("Directory\n"));
531 cur.type = FT_PLAINFILE;
533 size = str_to_wgint (tok, NULL, 10);
534 if (size == WGINT_MAX && errno == ERANGE)
535 cur.size = 0; /* overflow */
539 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
544 /* And put everything into the linked list */
547 l = dir = xnew (struct fileinfo);
548 memcpy (l, &cur, sizeof (cur));
549 l->prev = l->next = NULL;
554 l->next = xnew (struct fileinfo);
556 memcpy (l, &cur, sizeof (cur));
567 /* Converts VMS symbolic permissions to number-style ones, e.g. string
568 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
569 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
571 vmsperms (const char *s)
578 case ',': perms <<= 3; break;
579 case 'R': perms |= 4; break;
580 case 'W': perms |= 2; break;
581 case 'D': perms |= 2; break;
582 case 'E': perms |= 1; break;
583 default: DEBUGP(("wrong VMS permissons!\n"));
591 static struct fileinfo *
592 ftp_parse_vms_ls (const char *file)
595 /* #### A third copy of more-or-less the same array ? */
596 static const char *months[] = {
597 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
598 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
601 int year, month, day; /* for time analysis */
603 struct tm timestruct;
605 char *line, *tok; /* tokenizer */
606 struct fileinfo *dir, *l, cur; /* list creation */
608 fp = fopen (file, "rb");
611 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
616 /* Skip empty line. */
617 line = read_whole_line (fp);
621 /* Skip "Directory PUB$DEVICE[PUB]" */
622 line = read_whole_line (fp);
626 /* Skip empty line. */
627 line = read_whole_line (fp);
631 /* Line loop to end of file: */
632 while ((line = read_whole_line (fp)))
635 i = clean_line (line);
642 /* First column: Name. A bit of black magic again. The name my be
643 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
644 line. Therefore we will first try to get the complete name
645 until the first space character; if it fails, we assume that the name
646 occupies the whole line. After that we search for the version
647 separator ";", we remove it and check the extension of the file;
648 extension .DIR denotes directory. */
650 tok = strtok(line, " ");
651 if (tok == NULL) tok = line;
652 DEBUGP(("file name: '%s'\n", tok));
653 for (p = tok ; *p && *p != ';' ; p++);
654 if (*p == ';') *p = '\0';
655 p = tok + strlen(tok) - 4;
656 if (!strcmp(p, ".DIR")) *p = '\0';
657 cur.name = xstrdup(tok);
658 DEBUGP(("Name: '%s'\n", cur.name));
660 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
661 the file size to zero as the listing does tell us only the size in
662 filesystem blocks - for an integrity check (when mirroring, for
663 example) we would need the size in bytes. */
667 cur.type = FT_DIRECTORY;
669 DEBUGP(("Directory\n"));
673 cur.type = FT_PLAINFILE;
679 /* Second column, if exists, or the first column of the next line
680 contain file size in blocks. We will skip it. */
682 tok = strtok(NULL, " ");
685 DEBUGP(("Getting additional line\n"));
687 line = read_whole_line (fp);
690 DEBUGP(("empty line read, leaving listing parser\n"));
693 i = clean_line (line);
696 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
700 tok = strtok(line, " ");
702 DEBUGP(("second token: '%s'\n", tok));
704 /* Third/Second column: Date DD-MMM-YYYY. */
706 tok = strtok(NULL, "-");
707 if (tok == NULL) continue;
708 DEBUGP(("day: '%s'\n",tok));
710 tok = strtok(NULL, "-");
713 /* If the server produces garbage like
714 'EA95_0PS.GZ;1 No privilege for attempted operation'
715 the first strtok(NULL, "-") will return everything until the end
716 of the line and only the next strtok() call will return NULL. */
717 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
721 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
722 /* Uknown months are mapped to January */
724 tok = strtok (NULL, " ");
725 if (tok == NULL) continue;
726 year = atoi (tok) - 1900;
727 DEBUGP(("date parsed\n"));
729 /* Fourth/Third column: Time hh:mm[:ss] */
730 tok = strtok (NULL, " ");
731 if (tok == NULL) continue;
732 hour = min = sec = 0;
735 for (; *p && *p != ':'; ++p);
738 for (; *p && *p != ':'; ++p);
742 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
743 year+1900, month, day, hour, min, sec));
745 /* Build the time-stamp (copy & paste from above) */
746 timestruct.tm_sec = sec;
747 timestruct.tm_min = min;
748 timestruct.tm_hour = hour;
749 timestruct.tm_mday = day;
750 timestruct.tm_mon = month;
751 timestruct.tm_year = year;
752 timestruct.tm_wday = 0;
753 timestruct.tm_yday = 0;
754 timestruct.tm_isdst = -1;
755 cur.tstamp = mktime (×truct); /* store the time-stamp */
757 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
759 /* Skip the fifth column */
761 tok = strtok(NULL, " ");
762 if (tok == NULL) continue;
764 /* Sixth column: Permissions */
766 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
767 if (tok == NULL) continue;
768 tok = strtok(NULL, ")");
771 DEBUGP(("confusing VMS permissions, skipping line\n"));
775 /* Permissons have the format "RWED,RWED,RE" */
776 cur.perms = vmsperms(tok);
777 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
781 /* And put everything into the linked list */
784 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
785 memcpy (l, &cur, sizeof (cur));
786 l->prev = l->next = NULL;
791 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
793 memcpy (l, &cur, sizeof (cur));
805 /* This function switches between the correct parsing routine depending on
806 the SYSTEM_TYPE. The system type should be based on the result of the
807 "SYST" response of the FTP server. According to this repsonse we will
808 use on of the three different listing parsers that cover the most of FTP
809 servers used nowadays. */
812 ftp_parse_ls (const char *file, const enum stype system_type)
817 return ftp_parse_unix_ls (file, 0);
820 /* Detect whether the listing is simulating the UNIX format */
823 fp = fopen (file, "rb");
826 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
831 /* If the first character of the file is '0'-'9', it's WINNT
833 if (c >= '0' && c <='9')
834 return ftp_parse_winnt_ls (file);
836 return ftp_parse_unix_ls (file, 1);
839 return ftp_parse_vms_ls (file);
841 return ftp_parse_unix_ls (file, 1);
843 logprintf (LOG_NOTQUIET, _("\
844 Unsupported listing type, trying Unix listing parser.\n"));
845 return ftp_parse_unix_ls (file, 0);
849 /* Stuff for creating FTP index. */
851 /* The function creates an HTML index containing references to given
852 directories and files on the appropriate host. The references are
855 ftp_index (const char *file, struct url *u, struct fileinfo *f)
859 char *htclfile; /* HTML-clean file name */
863 fp = fopen (file, "wb");
866 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
874 char *tmpu, *tmpp; /* temporary, clean user and passwd */
876 tmpu = url_escape (u->user);
877 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
878 upwd = (char *)xmalloc (strlen (tmpu)
879 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
880 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
886 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
887 fprintf (fp, "<html>\n<head>\n<title>");
888 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
889 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
890 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
891 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
897 /* #### Should we translate the months? Or, even better, use
899 static const char *months[] = {
900 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
901 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
903 struct tm *ptm = localtime ((time_t *)&f->tstamp);
905 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
908 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
913 fprintf (fp, _("time unknown "));
917 fprintf (fp, _("File "));
920 fprintf (fp, _("Directory "));
923 fprintf (fp, _("Link "));
926 fprintf (fp, _("Not sure "));
929 htclfile = html_quote_string (f->name);
930 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
933 fprintf (fp, "%s", u->dir);
936 fprintf (fp, "%s", htclfile);
937 if (f->type == FT_DIRECTORY)
939 fprintf (fp, "\">%s", htclfile);
940 if (f->type == FT_DIRECTORY)
942 fprintf (fp, "</a> ");
943 if (f->type == FT_PLAINFILE)
944 fprintf (fp, _(" (%s bytes)"), legible (f->size));
945 else if (f->type == FT_SYMLINK)
946 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
951 fprintf (fp, "</pre>\n</body>\n</html>\n");