1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
43 #include <sys/types.h>
50 #include "convert.h" /* for html_quote_string prototype */
52 extern FILE *output_stream;
54 /* Converts symbolic permissions to number-style ones, e.g. string
55 rwxr-xr-x to 755. For now, it knows nothing of
56 setuid/setgid/sticky. ACLs are ignored. */
58 symperms (const char *s)
64 for (i = 0; i < 3; i++, s += 3)
67 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
68 (s[2] == 'x' || s[2] == 's'));
74 /* Cleans a line of text so that it can be consistently parsed. Destroys
75 <CR> and <LF> in case that thay occur at the end of the line and
76 replaces all <TAB> character with <SPACE>. Returns the length of the
79 clean_line(char *line)
81 int len = strlen (line);
83 if (line[len - 1] == '\n')
85 if (line[len - 1] == '\r')
87 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
91 /* Convert the Un*x-ish style directory listing stored in FILE to a
92 linked list of fileinfo (system-independent) entries. The contents
93 of FILE are considered to be produced by the standard Unix `ls -la'
94 output (whatever that might be). BSD (no group) and SYSV (with
95 group) listings are handled.
97 The time stamps are stored in a separate variable, time_t
98 compatible (I hope). The timezones are ignored. */
99 static struct fileinfo *
100 ftp_parse_unix_ls (const char *file, int ignore_perms)
103 static const char *months[] = {
104 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
105 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
107 int next, len, i, error, ignore;
108 int year, month, day; /* for time analysis */
110 struct tm timestruct, *tnow;
113 char *line, *tok; /* tokenizer */
114 struct fileinfo *dir, *l, cur; /* list creation */
116 fp = fopen (file, "rb");
119 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
124 /* Line loop to end of file: */
125 while ((line = read_whole_line (fp)) != NULL)
127 len = clean_line (line);
128 /* Skip if total... */
129 if (!strncasecmp (line, "total", 5))
134 /* Get the first token (permissions). */
135 tok = strtok (line, " ");
145 /* Decide whether we deal with a file or a directory. */
149 cur.type = FT_PLAINFILE;
150 DEBUGP (("PLAINFILE; "));
153 cur.type = FT_DIRECTORY;
154 DEBUGP (("DIRECTORY; "));
157 cur.type = FT_SYMLINK;
158 DEBUGP (("SYMLINK; "));
161 cur.type = FT_UNKNOWN;
162 DEBUGP (("UNKNOWN; "));
177 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
180 DEBUGP (("implicit perms %0o; ", cur.perms));
184 cur.perms = symperms (tok + 1);
185 DEBUGP (("perms %0o; ", cur.perms));
188 error = ignore = 0; /* Erroneous and ignoring entries are
189 treated equally for now. */
190 year = hour = min = sec = 0; /* Silence the compiler. */
193 /* While there are tokens on the line, parse them. Next is the
194 number of tokens left until the filename.
196 Use the month-name token as the "anchor" (the place where the
197 position wrt the file name is "known"). When a month name is
198 encountered, `next' is set to 5. Also, the preceding
199 characters are parsed to get the file size.
201 This tactic is quite dubious when it comes to
202 internationalization issues (non-English month names), but it
204 while ((tok = strtok (NULL, " ")) != NULL)
207 if (next < 0) /* a month name was not encountered */
209 for (i = 0; i < 12; i++)
210 if (!strcmp (tok, months[i]))
212 /* If we got a month, it means the token before it is the
213 size, and the filename is three tokens away. */
218 /* Back up to the beginning of the previous token
219 and parse it with str_to_wgint. */
221 while (t > line && ISDIGIT (*t))
225 /* Something has gone wrong during parsing. */
230 size = str_to_wgint (t, NULL, 10);
231 if (size == WGINT_MAX && errno == ERANGE)
232 /* Out of range -- ignore the size. #### Should
233 we refuse to start the download. */
240 DEBUGP (("month: %s; ", months[month]));
243 else if (next == 4) /* days */
245 if (tok[1]) /* two-digit... */
246 day = 10 * (*tok - '0') + tok[1] - '0';
247 else /* ...or one-digit */
249 DEBUGP (("day: %d; ", day));
253 /* This ought to be either the time, or the year. Let's
256 If we have a number x, it's a year. If we have x:y,
257 it's hours and minutes. If we have x:y:z, z are
260 min = hour = sec = 0;
261 /* We must deal with digits. */
264 /* Suppose it's year. */
265 for (; ISDIGIT (*tok); tok++)
266 year = (*tok - '0') + 10 * year;
269 /* This means these were hours! */
273 /* Get the minutes... */
274 for (; ISDIGIT (*tok); tok++)
275 min = (*tok - '0') + 10 * min;
278 /* ...and the seconds. */
280 for (; ISDIGIT (*tok); tok++)
281 sec = (*tok - '0') + 10 * sec;
286 DEBUGP (("year: %d (no tm); ", year));
288 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
290 else if (next == 2) /* The file name */
295 /* Since the file name may contain a SPC, it is possible
296 for strtok to handle it wrong. */
297 fnlen = strlen (tok);
298 if (fnlen < len - (tok - line))
300 /* So we have a SPC in the file name. Restore the
303 /* If the file is a symbolic link, it should have a
305 if (cur.type == FT_SYMLINK)
307 p = strstr (tok, " -> ");
313 cur.linkto = xstrdup (p + 4);
314 DEBUGP (("link to: %s\n", cur.linkto));
315 /* And separate it from the file name. */
319 /* If we have the filename, add it to the list of files or
321 /* "." and ".." are an exception! */
322 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
324 DEBUGP (("\nIgnoring `.' and `..'; "));
328 /* Some FTP sites choose to have ls -F as their default
329 LIST output, which marks the symlinks with a trailing
330 `@', directory names with a trailing `/' and
331 executables with a trailing `*'. This is no problem
332 unless encountering a symbolic link ending with `@',
333 or an executable ending with `*' on a server without
334 default -F output. I believe these cases are very
336 fnlen = strlen (tok); /* re-calculate `fnlen' */
337 cur.name = (char *)xmalloc (fnlen + 1);
338 memcpy (cur.name, tok, fnlen + 1);
341 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
343 cur.name[fnlen - 1] = '\0';
344 DEBUGP (("trailing `/' on dir.\n"));
346 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
348 cur.name[fnlen - 1] = '\0';
349 DEBUGP (("trailing `@' on link.\n"));
351 else if (cur.type == FT_PLAINFILE
352 && (cur.perms & 0111)
353 && cur.name[fnlen - 1] == '*')
355 cur.name[fnlen - 1] = '\0';
356 DEBUGP (("trailing `*' on exec.\n"));
367 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
374 DEBUGP (("Skipping.\n"));
375 xfree_null (cur.name);
376 xfree_null (cur.linkto);
383 l = dir = xnew (struct fileinfo);
384 memcpy (l, &cur, sizeof (cur));
385 l->prev = l->next = NULL;
390 l->next = xnew (struct fileinfo);
392 memcpy (l, &cur, sizeof (cur));
395 /* Get the current time. */
396 timenow = time (NULL);
397 tnow = localtime (&timenow);
398 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
399 timestruct.tm_sec = sec;
400 timestruct.tm_min = min;
401 timestruct.tm_hour = hour;
402 timestruct.tm_mday = day;
403 timestruct.tm_mon = month;
406 /* Some listings will not specify the year if it is "obvious"
407 that the file was from the previous year. E.g. if today
408 is 97-01-12, and you see a file of Dec 15th, its year is
409 1996, not 1997. Thanks to Vladimir Volovich for
411 if (month > tnow->tm_mon)
412 timestruct.tm_year = tnow->tm_year - 1;
414 timestruct.tm_year = tnow->tm_year;
417 timestruct.tm_year = year;
418 if (timestruct.tm_year >= 1900)
419 timestruct.tm_year -= 1900;
420 timestruct.tm_wday = 0;
421 timestruct.tm_yday = 0;
422 timestruct.tm_isdst = -1;
423 l->tstamp = mktime (×truct); /* store the time-stamp */
432 static struct fileinfo *
433 ftp_parse_winnt_ls (const char *file)
437 int year, month, day; /* for time analysis */
439 struct tm timestruct;
441 char *line, *tok; /* tokenizer */
442 struct fileinfo *dir, *l, cur; /* list creation */
444 fp = fopen (file, "rb");
447 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
452 /* Line loop to end of file: */
453 while ((line = read_whole_line (fp)) != NULL)
455 len = clean_line (line);
457 /* Extracting name is a bit of black magic and we have to do it
458 before `strtok' inserted extra \0 characters in the line
459 string. For the moment let us just suppose that the name starts at
460 column 39 of the listing. This way we could also recognize
461 filenames that begin with a series of space characters (but who
462 really wants to use such filenames anyway?). */
463 if (len < 40) continue;
465 cur.name = xstrdup(tok);
466 DEBUGP(("Name: '%s'\n", cur.name));
468 /* First column: mm-dd-yy. Should atoi() on the month fail, january
470 tok = strtok(line, "-");
471 if (tok == NULL) continue;
472 month = atoi(tok) - 1;
473 if (month < 0) month = 0;
474 tok = strtok(NULL, "-");
475 if (tok == NULL) continue;
477 tok = strtok(NULL, " ");
478 if (tok == NULL) continue;
480 /* Assuming the epoch starting at 1.1.1970 */
481 if (year <= 70) year += 100;
483 /* Second column: hh:mm[AP]M, listing does not contain value for
485 tok = strtok(NULL, ":");
486 if (tok == NULL) continue;
488 tok = strtok(NULL, "M");
489 if (tok == NULL) continue;
491 /* Adjust hour from AM/PM. Just for the record, the sequence goes
492 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
494 if (hour == 12) hour = 0;
495 if (*tok == 'P') hour += 12;
497 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
498 year+1900, month, day, hour, min));
500 /* Build the time-stamp (copy & paste from above) */
501 timestruct.tm_sec = 0;
502 timestruct.tm_min = min;
503 timestruct.tm_hour = hour;
504 timestruct.tm_mday = day;
505 timestruct.tm_mon = month;
506 timestruct.tm_year = year;
507 timestruct.tm_wday = 0;
508 timestruct.tm_yday = 0;
509 timestruct.tm_isdst = -1;
510 cur.tstamp = mktime (×truct); /* store the time-stamp */
512 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
514 /* Third column: Either file length, or <DIR>. We also set the
515 permissions (guessed as 0644 for plain files and 0755 for
516 directories as the listing does not give us a clue) and filetype
518 tok = strtok(NULL, " ");
519 if (tok == NULL) continue;
520 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
521 if (tok == NULL) continue;
524 cur.type = FT_DIRECTORY;
527 DEBUGP(("Directory\n"));
532 cur.type = FT_PLAINFILE;
534 size = str_to_wgint (tok, NULL, 10);
535 if (size == WGINT_MAX && errno == ERANGE)
536 cur.size = 0; /* overflow */
540 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
545 /* And put everything into the linked list */
548 l = dir = xnew (struct fileinfo);
549 memcpy (l, &cur, sizeof (cur));
550 l->prev = l->next = NULL;
555 l->next = xnew (struct fileinfo);
557 memcpy (l, &cur, sizeof (cur));
568 /* Converts VMS symbolic permissions to number-style ones, e.g. string
569 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
570 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
572 vmsperms (const char *s)
579 case ',': perms <<= 3; break;
580 case 'R': perms |= 4; break;
581 case 'W': perms |= 2; break;
582 case 'D': perms |= 2; break;
583 case 'E': perms |= 1; break;
584 default: DEBUGP(("wrong VMS permissons!\n"));
592 static struct fileinfo *
593 ftp_parse_vms_ls (const char *file)
596 /* #### A third copy of more-or-less the same array ? */
597 static const char *months[] = {
598 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
599 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
602 int year, month, day; /* for time analysis */
604 struct tm timestruct;
606 char *line, *tok; /* tokenizer */
607 struct fileinfo *dir, *l, cur; /* list creation */
609 fp = fopen (file, "rb");
612 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
617 /* Skip empty line. */
618 line = read_whole_line (fp);
621 /* Skip "Directory PUB$DEVICE[PUB]" */
622 line = read_whole_line (fp);
625 /* Skip empty line. */
626 line = read_whole_line (fp);
629 /* Line loop to end of file: */
630 while ((line = read_whole_line (fp)) != NULL)
633 i = clean_line (line);
640 /* First column: Name. A bit of black magic again. The name my be
641 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
642 line. Therefore we will first try to get the complete name
643 until the first space character; if it fails, we assume that the name
644 occupies the whole line. After that we search for the version
645 separator ";", we remove it and check the extension of the file;
646 extension .DIR denotes directory. */
648 tok = strtok(line, " ");
649 if (tok == NULL) tok = line;
650 DEBUGP(("file name: '%s'\n", tok));
651 for (p = tok ; *p && *p != ';' ; p++);
652 if (*p == ';') *p = '\0';
653 p = tok + strlen(tok) - 4;
654 if (!strcmp(p, ".DIR")) *p = '\0';
655 cur.name = xstrdup(tok);
656 DEBUGP(("Name: '%s'\n", cur.name));
658 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
659 the file size to zero as the listing does tell us only the size in
660 filesystem blocks - for an integrity check (when mirroring, for
661 example) we would need the size in bytes. */
665 cur.type = FT_DIRECTORY;
667 DEBUGP(("Directory\n"));
671 cur.type = FT_PLAINFILE;
677 /* Second column, if exists, or the first column of the next line
678 contain file size in blocks. We will skip it. */
680 tok = strtok(NULL, " ");
683 DEBUGP(("Getting additional line\n"));
685 line = read_whole_line (fp);
688 DEBUGP(("empty line read, leaving listing parser\n"));
691 i = clean_line (line);
694 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
698 tok = strtok(line, " ");
700 DEBUGP(("second token: '%s'\n", tok));
702 /* Third/Second column: Date DD-MMM-YYYY. */
704 tok = strtok(NULL, "-");
705 if (tok == NULL) continue;
706 DEBUGP(("day: '%s'\n",tok));
708 tok = strtok(NULL, "-");
711 /* If the server produces garbage like
712 'EA95_0PS.GZ;1 No privilege for attempted operation'
713 the first strtok(NULL, "-") will return everything until the end
714 of the line and only the next strtok() call will return NULL. */
715 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
719 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
720 /* Uknown months are mapped to January */
722 tok = strtok (NULL, " ");
723 if (tok == NULL) continue;
724 year = atoi (tok) - 1900;
725 DEBUGP(("date parsed\n"));
727 /* Fourth/Third column: Time hh:mm[:ss] */
728 tok = strtok (NULL, " ");
729 if (tok == NULL) continue;
733 for (; *p && *p != ':'; ++p);
736 for (; *p && *p != ':'; ++p);
740 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
741 year+1900, month, day, hour, min, sec));
743 /* Build the time-stamp (copy & paste from above) */
744 timestruct.tm_sec = sec;
745 timestruct.tm_min = min;
746 timestruct.tm_hour = hour;
747 timestruct.tm_mday = day;
748 timestruct.tm_mon = month;
749 timestruct.tm_year = year;
750 timestruct.tm_wday = 0;
751 timestruct.tm_yday = 0;
752 timestruct.tm_isdst = -1;
753 cur.tstamp = mktime (×truct); /* store the time-stamp */
755 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
757 /* Skip the fifth column */
759 tok = strtok(NULL, " ");
760 if (tok == NULL) continue;
762 /* Sixth column: Permissions */
764 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
765 if (tok == NULL) continue;
766 tok = strtok(NULL, ")");
769 DEBUGP(("confusing VMS permissions, skipping line\n"));
773 /* Permissons have the format "RWED,RWED,RE" */
774 cur.perms = vmsperms(tok);
775 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
779 /* And put everything into the linked list */
782 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
783 memcpy (l, &cur, sizeof (cur));
784 l->prev = l->next = NULL;
789 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
791 memcpy (l, &cur, sizeof (cur));
803 /* This function switches between the correct parsing routine depending on
804 the SYSTEM_TYPE. The system type should be based on the result of the
805 "SYST" response of the FTP server. According to this repsonse we will
806 use on of the three different listing parsers that cover the most of FTP
807 servers used nowadays. */
810 ftp_parse_ls (const char *file, const enum stype system_type)
815 return ftp_parse_unix_ls (file, 0);
818 /* Detect whether the listing is simulating the UNIX format */
821 fp = fopen (file, "rb");
824 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
829 /* If the first character of the file is '0'-'9', it's WINNT
831 if (c >= '0' && c <='9')
832 return ftp_parse_winnt_ls (file);
834 return ftp_parse_unix_ls (file, 1);
837 return ftp_parse_vms_ls (file);
839 return ftp_parse_unix_ls (file, 1);
841 logprintf (LOG_NOTQUIET, _("\
842 Unsupported listing type, trying Unix listing parser.\n"));
843 return ftp_parse_unix_ls (file, 0);
847 /* Stuff for creating FTP index. */
849 /* The function creates an HTML index containing references to given
850 directories and files on the appropriate host. The references are
853 ftp_index (const char *file, struct url *u, struct fileinfo *f)
857 char *htclfile; /* HTML-clean file name */
861 fp = fopen (file, "wb");
864 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
872 char *tmpu, *tmpp; /* temporary, clean user and passwd */
874 tmpu = url_escape (u->user);
875 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
877 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
879 upwd = concat_strings (tmpu, "@", (char *) 0);
885 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
886 fprintf (fp, "<html>\n<head>\n<title>");
887 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
888 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
889 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
890 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
896 /* #### Should we translate the months? Or, even better, use
898 static const char *months[] = {
899 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
900 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
902 struct tm *ptm = localtime ((time_t *)&f->tstamp);
904 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
907 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
912 fprintf (fp, _("time unknown "));
916 fprintf (fp, _("File "));
919 fprintf (fp, _("Directory "));
922 fprintf (fp, _("Link "));
925 fprintf (fp, _("Not sure "));
928 htclfile = html_quote_string (f->name);
929 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
932 fprintf (fp, "%s", u->dir);
935 fprintf (fp, "%s", htclfile);
936 if (f->type == FT_DIRECTORY)
938 fprintf (fp, "\">%s", htclfile);
939 if (f->type == FT_DIRECTORY)
941 fprintf (fp, "</a> ");
942 if (f->type == FT_PLAINFILE)
943 fprintf (fp, _(" (%s bytes)"), with_thousand_seps (f->size));
944 else if (f->type == FT_SYMLINK)
945 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
950 fprintf (fp, "</pre>\n</body>\n</html>\n");