1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
43 #include <sys/types.h>
51 extern FILE *output_stream;
53 /* Converts symbolic permissions to number-style ones, e.g. string
54 rwxr-xr-x to 755. For now, it knows nothing of
55 setuid/setgid/sticky. ACLs are ignored. */
57 symperms (const char *s)
63 for (i = 0; i < 3; i++, s += 3)
66 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
67 (s[2] == 'x' || s[2] == 's'));
73 /* Cleans a line of text so that it can be consistently parsed. Destroys
74 <CR> and <LF> in case that thay occur at the end of the line and
75 replaces all <TAB> character with <SPACE>. Returns the length of the
78 clean_line(char *line)
80 int len = strlen (line);
82 if (line[len - 1] == '\n')
84 if (line[len - 1] == '\r')
86 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
90 /* Convert the Un*x-ish style directory listing stored in FILE to a
91 linked list of fileinfo (system-independent) entries. The contents
92 of FILE are considered to be produced by the standard Unix `ls -la'
93 output (whatever that might be). BSD (no group) and SYSV (with
94 group) listings are handled.
96 The time stamps are stored in a separate variable, time_t
97 compatible (I hope). The timezones are ignored. */
98 static struct fileinfo *
99 ftp_parse_unix_ls (const char *file, int ignore_perms)
102 static const char *months[] = {
103 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
104 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
106 int next, len, i, error, ignore;
107 int year, month, day; /* for time analysis */
109 struct tm timestruct, *tnow;
112 char *line, *tok; /* tokenizer */
113 struct fileinfo *dir, *l, cur; /* list creation */
115 fp = fopen (file, "rb");
118 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
123 /* Line loop to end of file: */
124 while ((line = read_whole_line (fp)))
126 len = clean_line (line);
127 /* Skip if total... */
128 if (!strncasecmp (line, "total", 5))
133 /* Get the first token (permissions). */
134 tok = strtok (line, " ");
144 /* Decide whether we deal with a file or a directory. */
148 cur.type = FT_PLAINFILE;
149 DEBUGP (("PLAINFILE; "));
152 cur.type = FT_DIRECTORY;
153 DEBUGP (("DIRECTORY; "));
156 cur.type = FT_SYMLINK;
157 DEBUGP (("SYMLINK; "));
160 cur.type = FT_UNKNOWN;
161 DEBUGP (("UNKNOWN; "));
176 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
179 DEBUGP (("implicit perms %0o; ", cur.perms));
183 cur.perms = symperms (tok + 1);
184 DEBUGP (("perms %0o; ", cur.perms));
187 error = ignore = 0; /* Erroneous and ignoring entries are
188 treated equally for now. */
189 year = hour = min = sec = 0; /* Silence the compiler. */
192 /* While there are tokens on the line, parse them. Next is the
193 number of tokens left until the filename.
195 Use the month-name token as the "anchor" (the place where the
196 position wrt the file name is "known"). When a month name is
197 encountered, `next' is set to 5. Also, the preceding
198 characters are parsed to get the file size.
200 This tactic is quite dubious when it comes to
201 internationalization issues (non-English month names), but it
203 while ((tok = strtok (NULL, " ")))
206 if (next < 0) /* a month name was not encountered */
208 for (i = 0; i < 12; i++)
209 if (!strcmp (tok, months[i]))
211 /* If we got a month, it means the token before it is the
212 size, and the filename is three tokens away. */
218 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
219 cur.size += mul * (*t - '0');
222 /* Something is seriously wrong. */
228 DEBUGP (("month: %s; ", months[month]));
231 else if (next == 4) /* days */
233 if (tok[1]) /* two-digit... */
234 day = 10 * (*tok - '0') + tok[1] - '0';
235 else /* ...or one-digit */
237 DEBUGP (("day: %d; ", day));
241 /* This ought to be either the time, or the year. Let's
244 If we have a number x, it's a year. If we have x:y,
245 it's hours and minutes. If we have x:y:z, z are
248 min = hour = sec = 0;
249 /* We must deal with digits. */
252 /* Suppose it's year. */
253 for (; ISDIGIT (*tok); tok++)
254 year = (*tok - '0') + 10 * year;
257 /* This means these were hours! */
261 /* Get the minutes... */
262 for (; ISDIGIT (*tok); tok++)
263 min = (*tok - '0') + 10 * min;
266 /* ...and the seconds. */
268 for (; ISDIGIT (*tok); tok++)
269 sec = (*tok - '0') + 10 * sec;
274 DEBUGP (("year: %d (no tm); ", year));
276 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
278 else if (next == 2) /* The file name */
283 /* Since the file name may contain a SPC, it is possible
284 for strtok to handle it wrong. */
285 fnlen = strlen (tok);
286 if (fnlen < len - (tok - line))
288 /* So we have a SPC in the file name. Restore the
291 /* If the file is a symbolic link, it should have a
293 if (cur.type == FT_SYMLINK)
295 p = strstr (tok, " -> ");
301 cur.linkto = xstrdup (p + 4);
302 DEBUGP (("link to: %s\n", cur.linkto));
303 /* And separate it from the file name. */
307 /* If we have the filename, add it to the list of files or
309 /* "." and ".." are an exception! */
310 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
312 DEBUGP (("\nIgnoring `.' and `..'; "));
316 /* Some FTP sites choose to have ls -F as their default
317 LIST output, which marks the symlinks with a trailing
318 `@', directory names with a trailing `/' and
319 executables with a trailing `*'. This is no problem
320 unless encountering a symbolic link ending with `@',
321 or an executable ending with `*' on a server without
322 default -F output. I believe these cases are very
324 fnlen = strlen (tok); /* re-calculate `fnlen' */
325 cur.name = (char *)xmalloc (fnlen + 1);
326 memcpy (cur.name, tok, fnlen + 1);
329 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
331 cur.name[fnlen - 1] = '\0';
332 DEBUGP (("trailing `/' on dir.\n"));
334 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
336 cur.name[fnlen - 1] = '\0';
337 DEBUGP (("trailing `@' on link.\n"));
339 else if (cur.type == FT_PLAINFILE
340 && (cur.perms & 0111)
341 && cur.name[fnlen - 1] == '*')
343 cur.name[fnlen - 1] = '\0';
344 DEBUGP (("trailing `*' on exec.\n"));
355 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
362 DEBUGP (("Skipping.\n"));
363 xfree_null (cur.name);
364 xfree_null (cur.linkto);
371 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
372 memcpy (l, &cur, sizeof (cur));
373 l->prev = l->next = NULL;
378 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
380 memcpy (l, &cur, sizeof (cur));
383 /* Get the current time. */
384 timenow = time (NULL);
385 tnow = localtime (&timenow);
386 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
387 timestruct.tm_sec = sec;
388 timestruct.tm_min = min;
389 timestruct.tm_hour = hour;
390 timestruct.tm_mday = day;
391 timestruct.tm_mon = month;
394 /* Some listings will not specify the year if it is "obvious"
395 that the file was from the previous year. E.g. if today
396 is 97-01-12, and you see a file of Dec 15th, its year is
397 1996, not 1997. Thanks to Vladimir Volovich for
399 if (month > tnow->tm_mon)
400 timestruct.tm_year = tnow->tm_year - 1;
402 timestruct.tm_year = tnow->tm_year;
405 timestruct.tm_year = year;
406 if (timestruct.tm_year >= 1900)
407 timestruct.tm_year -= 1900;
408 timestruct.tm_wday = 0;
409 timestruct.tm_yday = 0;
410 timestruct.tm_isdst = -1;
411 l->tstamp = mktime (×truct); /* store the time-stamp */
420 static struct fileinfo *
421 ftp_parse_winnt_ls (const char *file)
425 int year, month, day; /* for time analysis */
427 struct tm timestruct;
429 char *line, *tok; /* tokenizer */
430 struct fileinfo *dir, *l, cur; /* list creation */
432 fp = fopen (file, "rb");
435 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
440 /* Line loop to end of file: */
441 while ((line = read_whole_line (fp)))
443 len = clean_line (line);
445 /* Extracting name is a bit of black magic and we have to do it
446 before `strtok' inserted extra \0 characters in the line
447 string. For the moment let us just suppose that the name starts at
448 column 39 of the listing. This way we could also recognize
449 filenames that begin with a series of space characters (but who
450 really wants to use such filenames anyway?). */
451 if (len < 40) continue;
453 cur.name = xstrdup(tok);
454 DEBUGP(("Name: '%s'\n", cur.name));
456 /* First column: mm-dd-yy. Should atoi() on the month fail, january
458 tok = strtok(line, "-");
459 if (tok == NULL) continue;
460 month = atoi(tok) - 1;
461 if (month < 0) month = 0;
462 tok = strtok(NULL, "-");
463 if (tok == NULL) continue;
465 tok = strtok(NULL, " ");
466 if (tok == NULL) continue;
468 /* Assuming the epoch starting at 1.1.1970 */
469 if (year <= 70) year += 100;
471 /* Second column: hh:mm[AP]M, listing does not contain value for
473 tok = strtok(NULL, ":");
474 if (tok == NULL) continue;
476 tok = strtok(NULL, "M");
477 if (tok == NULL) continue;
479 /* Adjust hour from AM/PM. Just for the record, the sequence goes
480 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
482 if (hour == 12) hour = 0;
483 if (*tok == 'P') hour += 12;
485 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
486 year+1900, month, day, hour, min));
488 /* Build the time-stamp (copy & paste from above) */
489 timestruct.tm_sec = 0;
490 timestruct.tm_min = min;
491 timestruct.tm_hour = hour;
492 timestruct.tm_mday = day;
493 timestruct.tm_mon = month;
494 timestruct.tm_year = year;
495 timestruct.tm_wday = 0;
496 timestruct.tm_yday = 0;
497 timestruct.tm_isdst = -1;
498 cur.tstamp = mktime (×truct); /* store the time-stamp */
500 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
502 /* Third column: Either file length, or <DIR>. We also set the
503 permissions (guessed as 0644 for plain files and 0755 for
504 directories as the listing does not give us a clue) and filetype
506 tok = strtok(NULL, " ");
507 if (tok == NULL) continue;
508 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
509 if (tok == NULL) continue;
512 cur.type = FT_DIRECTORY;
515 DEBUGP(("Directory\n"));
519 cur.type = FT_PLAINFILE;
520 cur.size = atoi(tok);
522 DEBUGP(("File, size %ld bytes\n", cur.size));
527 /* And put everything into the linked list */
530 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
531 memcpy (l, &cur, sizeof (cur));
532 l->prev = l->next = NULL;
537 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
539 memcpy (l, &cur, sizeof (cur));
550 /* Converts VMS symbolic permissions to number-style ones, e.g. string
551 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
552 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
554 vmsperms (const char *s)
561 case ',': perms <<= 3; break;
562 case 'R': perms |= 4; break;
563 case 'W': perms |= 2; break;
564 case 'D': perms |= 2; break;
565 case 'E': perms |= 1; break;
566 default: DEBUGP(("wrong VMS permissons!\n"));
574 static struct fileinfo *
575 ftp_parse_vms_ls (const char *file)
578 /* #### A third copy of more-or-less the same array ? */
579 static const char *months[] = {
580 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
581 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
584 int year, month, day; /* for time analysis */
586 struct tm timestruct;
588 char *line, *tok; /* tokenizer */
589 struct fileinfo *dir, *l, cur; /* list creation */
591 fp = fopen (file, "rb");
594 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
599 /* Skip empty line. */
600 line = read_whole_line (fp);
604 /* Skip "Directory PUB$DEVICE[PUB]" */
605 line = read_whole_line (fp);
609 /* Skip empty line. */
610 line = read_whole_line (fp);
614 /* Line loop to end of file: */
615 while ((line = read_whole_line (fp)))
618 i = clean_line (line);
625 /* First column: Name. A bit of black magic again. The name my be
626 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
627 line. Therefore we will first try to get the complete name
628 until the first space character; if it fails, we assume that the name
629 occupies the whole line. After that we search for the version
630 separator ";", we remove it and check the extension of the file;
631 extension .DIR denotes directory. */
633 tok = strtok(line, " ");
634 if (tok == NULL) tok = line;
635 DEBUGP(("file name: '%s'\n", tok));
636 for (p = tok ; *p && *p != ';' ; p++);
637 if (*p == ';') *p = '\0';
638 p = tok + strlen(tok) - 4;
639 if (!strcmp(p, ".DIR")) *p = '\0';
640 cur.name = xstrdup(tok);
641 DEBUGP(("Name: '%s'\n", cur.name));
643 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
644 the file size to zero as the listing does tell us only the size in
645 filesystem blocks - for an integrity check (when mirroring, for
646 example) we would need the size in bytes. */
650 cur.type = FT_DIRECTORY;
652 DEBUGP(("Directory\n"));
656 cur.type = FT_PLAINFILE;
662 /* Second column, if exists, or the first column of the next line
663 contain file size in blocks. We will skip it. */
665 tok = strtok(NULL, " ");
668 DEBUGP(("Getting additional line\n"));
670 line = read_whole_line (fp);
673 DEBUGP(("empty line read, leaving listing parser\n"));
676 i = clean_line (line);
679 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
683 tok = strtok(line, " ");
685 DEBUGP(("second token: '%s'\n", tok));
687 /* Third/Second column: Date DD-MMM-YYYY. */
689 tok = strtok(NULL, "-");
690 if (tok == NULL) continue;
691 DEBUGP(("day: '%s'\n",tok));
693 tok = strtok(NULL, "-");
696 /* If the server produces garbage like
697 'EA95_0PS.GZ;1 No privilege for attempted operation'
698 the first strtok(NULL, "-") will return everything until the end
699 of the line and only the next strtok() call will return NULL. */
700 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
704 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
705 /* Uknown months are mapped to January */
707 tok = strtok (NULL, " ");
708 if (tok == NULL) continue;
709 year = atoi (tok) - 1900;
710 DEBUGP(("date parsed\n"));
712 /* Fourth/Third column: Time hh:mm[:ss] */
713 tok = strtok (NULL, " ");
714 if (tok == NULL) continue;
715 hour = min = sec = 0;
718 for (; *p && *p != ':'; ++p);
721 for (; *p && *p != ':'; ++p);
725 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
726 year+1900, month, day, hour, min, sec));
728 /* Build the time-stamp (copy & paste from above) */
729 timestruct.tm_sec = sec;
730 timestruct.tm_min = min;
731 timestruct.tm_hour = hour;
732 timestruct.tm_mday = day;
733 timestruct.tm_mon = month;
734 timestruct.tm_year = year;
735 timestruct.tm_wday = 0;
736 timestruct.tm_yday = 0;
737 timestruct.tm_isdst = -1;
738 cur.tstamp = mktime (×truct); /* store the time-stamp */
740 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
742 /* Skip the fifth column */
744 tok = strtok(NULL, " ");
745 if (tok == NULL) continue;
747 /* Sixth column: Permissions */
749 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
750 if (tok == NULL) continue;
751 tok = strtok(NULL, ")");
754 DEBUGP(("confusing VMS permissions, skipping line\n"));
758 /* Permissons have the format "RWED,RWED,RE" */
759 cur.perms = vmsperms(tok);
760 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
764 /* And put everything into the linked list */
767 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
768 memcpy (l, &cur, sizeof (cur));
769 l->prev = l->next = NULL;
774 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
776 memcpy (l, &cur, sizeof (cur));
788 /* This function switches between the correct parsing routine depending on
789 the SYSTEM_TYPE. The system type should be based on the result of the
790 "SYST" response of the FTP server. According to this repsonse we will
791 use on of the three different listing parsers that cover the most of FTP
792 servers used nowadays. */
795 ftp_parse_ls (const char *file, const enum stype system_type)
800 return ftp_parse_unix_ls (file, 0);
803 /* Detect whether the listing is simulating the UNIX format */
806 fp = fopen (file, "rb");
809 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
814 /* If the first character of the file is '0'-'9', it's WINNT
816 if (c >= '0' && c <='9')
817 return ftp_parse_winnt_ls (file);
819 return ftp_parse_unix_ls (file, 1);
822 return ftp_parse_vms_ls (file);
824 return ftp_parse_unix_ls (file, 1);
826 logprintf (LOG_NOTQUIET, _("\
827 Unsupported listing type, trying Unix listing parser.\n"));
828 return ftp_parse_unix_ls (file, 0);
832 /* Stuff for creating FTP index. */
834 /* The function creates an HTML index containing references to given
835 directories and files on the appropriate host. The references are
838 ftp_index (const char *file, struct url *u, struct fileinfo *f)
842 char *htclfile; /* HTML-clean file name */
846 fp = fopen (file, "wb");
849 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
857 char *tmpu, *tmpp; /* temporary, clean user and passwd */
859 tmpu = url_escape (u->user);
860 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
861 upwd = (char *)xmalloc (strlen (tmpu)
862 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
863 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
869 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
870 fprintf (fp, "<html>\n<head>\n<title>");
871 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
872 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
873 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
874 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
880 /* #### Should we translate the months? Or, even better, use
882 static const char *months[] = {
883 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
884 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
886 struct tm *ptm = localtime ((time_t *)&f->tstamp);
888 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
891 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
896 fprintf (fp, _("time unknown "));
900 fprintf (fp, _("File "));
903 fprintf (fp, _("Directory "));
906 fprintf (fp, _("Link "));
909 fprintf (fp, _("Not sure "));
912 htclfile = html_quote_string (f->name);
913 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
916 fprintf (fp, "%s", u->dir);
919 fprintf (fp, "%s", htclfile);
920 if (f->type == FT_DIRECTORY)
922 fprintf (fp, "\">%s", htclfile);
923 if (f->type == FT_DIRECTORY)
925 fprintf (fp, "</a> ");
926 if (f->type == FT_PLAINFILE)
927 fprintf (fp, _(" (%s bytes)"), legible (f->size));
928 else if (f->type == FT_SYMLINK)
929 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
934 fprintf (fp, "</pre>\n</body>\n</html>\n");