1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
43 #include <sys/types.h>
51 extern FILE *output_stream;
53 /* Converts symbolic permissions to number-style ones, e.g. string
54 rwxr-xr-x to 755. For now, it knows nothing of
55 setuid/setgid/sticky. ACLs are ignored. */
57 symperms (const char *s)
63 for (i = 0; i < 3; i++, s += 3)
66 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
67 (s[2] == 'x' || s[2] == 's'));
73 /* Cleans a line of text so that it can be consistently parsed. Destroys
74 <CR> and <LF> in case that thay occur at the end of the line and
75 replaces all <TAB> character with <SPACE>. Returns the length of the
78 clean_line(char *line)
80 int len = strlen (line);
82 if (line[len - 1] == '\n')
84 if (line[len - 1] == '\r')
86 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
90 /* Convert the Un*x-ish style directory listing stored in FILE to a
91 linked list of fileinfo (system-independent) entries. The contents
92 of FILE are considered to be produced by the standard Unix `ls -la'
93 output (whatever that might be). BSD (no group) and SYSV (with
94 group) listings are handled.
96 The time stamps are stored in a separate variable, time_t
97 compatible (I hope). The timezones are ignored. */
98 static struct fileinfo *
99 ftp_parse_unix_ls (const char *file, int ignore_perms)
102 static const char *months[] = {
103 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
104 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
106 int next, len, i, error, ignore;
107 int year, month, day; /* for time analysis */
109 struct tm timestruct, *tnow;
112 char *line, *tok; /* tokenizer */
113 struct fileinfo *dir, *l, cur; /* list creation */
115 fp = fopen (file, "rb");
118 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
123 /* Line loop to end of file: */
124 while ((line = read_whole_line (fp)))
126 len = clean_line (line);
127 /* Skip if total... */
128 if (!strncasecmp (line, "total", 5))
133 /* Get the first token (permissions). */
134 tok = strtok (line, " ");
144 /* Decide whether we deal with a file or a directory. */
148 cur.type = FT_PLAINFILE;
149 DEBUGP (("PLAINFILE; "));
152 cur.type = FT_DIRECTORY;
153 DEBUGP (("DIRECTORY; "));
156 cur.type = FT_SYMLINK;
157 DEBUGP (("SYMLINK; "));
160 cur.type = FT_UNKNOWN;
161 DEBUGP (("UNKNOWN; "));
176 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
179 DEBUGP (("implicit perms %0o; ", cur.perms));
183 cur.perms = symperms (tok + 1);
184 DEBUGP (("perms %0o; ", cur.perms));
187 error = ignore = 0; /* Erroneous and ignoring entries are
188 treated equally for now. */
189 year = hour = min = sec = 0; /* Silence the compiler. */
192 /* While there are tokens on the line, parse them. Next is the
193 number of tokens left until the filename.
195 Use the month-name token as the "anchor" (the place where the
196 position wrt the file name is "known"). When a month name is
197 encountered, `next' is set to 5. Also, the preceding
198 characters are parsed to get the file size.
200 This tactic is quite dubious when it comes to
201 internationalization issues (non-English month names), but it
203 while ((tok = strtok (NULL, " ")))
206 if (next < 0) /* a month name was not encountered */
208 for (i = 0; i < 12; i++)
209 if (!strcmp (tok, months[i]))
211 /* If we got a month, it means the token before it is the
212 size, and the filename is three tokens away. */
218 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
219 cur.size += mul * (*t - '0');
222 /* Something is seriously wrong. */
228 DEBUGP (("month: %s; ", months[month]));
231 else if (next == 4) /* days */
233 if (tok[1]) /* two-digit... */
234 day = 10 * (*tok - '0') + tok[1] - '0';
235 else /* ...or one-digit */
237 DEBUGP (("day: %d; ", day));
241 /* This ought to be either the time, or the year. Let's
244 If we have a number x, it's a year. If we have x:y,
245 it's hours and minutes. If we have x:y:z, z are
248 min = hour = sec = 0;
249 /* We must deal with digits. */
252 /* Suppose it's year. */
253 for (; ISDIGIT (*tok); tok++)
254 year = (*tok - '0') + 10 * year;
257 /* This means these were hours! */
261 /* Get the minutes... */
262 for (; ISDIGIT (*tok); tok++)
263 min = (*tok - '0') + 10 * min;
266 /* ...and the seconds. */
268 for (; ISDIGIT (*tok); tok++)
269 sec = (*tok - '0') + 10 * sec;
274 DEBUGP (("year: %d (no tm); ", year));
276 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
278 else if (next == 2) /* The file name */
283 /* Since the file name may contain a SPC, it is possible
284 for strtok to handle it wrong. */
285 fnlen = strlen (tok);
286 if (fnlen < len - (tok - line))
288 /* So we have a SPC in the file name. Restore the
291 /* If the file is a symbolic link, it should have a
293 if (cur.type == FT_SYMLINK)
295 p = strstr (tok, " -> ");
301 cur.linkto = xstrdup (p + 4);
302 DEBUGP (("link to: %s\n", cur.linkto));
303 /* And separate it from the file name. */
307 /* If we have the filename, add it to the list of files or
309 /* "." and ".." are an exception! */
310 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
312 DEBUGP (("\nIgnoring `.' and `..'; "));
316 /* Some FTP sites choose to have ls -F as their default
317 LIST output, which marks the symlinks with a trailing
318 `@', directory names with a trailing `/' and
319 executables with a trailing `*'. This is no problem
320 unless encountering a symbolic link ending with `@',
321 or an executable ending with `*' on a server without
322 default -F output. I believe these cases are very
324 fnlen = strlen (tok); /* re-calculate `fnlen' */
325 cur.name = (char *)xmalloc (fnlen + 1);
326 memcpy (cur.name, tok, fnlen + 1);
329 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
331 cur.name[fnlen - 1] = '\0';
332 DEBUGP (("trailing `/' on dir.\n"));
334 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
336 cur.name[fnlen - 1] = '\0';
337 DEBUGP (("trailing `@' on link.\n"));
339 else if (cur.type == FT_PLAINFILE
340 && (cur.perms & 0111)
341 && cur.name[fnlen - 1] == '*')
343 cur.name[fnlen - 1] = '\0';
344 DEBUGP (("trailing `*' on exec.\n"));
355 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
362 DEBUGP (("Skipping.\n"));
363 xfree_null (cur.name);
364 xfree_null (cur.linkto);
371 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
372 memcpy (l, &cur, sizeof (cur));
373 l->prev = l->next = NULL;
378 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
380 memcpy (l, &cur, sizeof (cur));
383 /* Get the current time. */
384 timenow = time (NULL);
385 tnow = localtime (&timenow);
386 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
387 timestruct.tm_sec = sec;
388 timestruct.tm_min = min;
389 timestruct.tm_hour = hour;
390 timestruct.tm_mday = day;
391 timestruct.tm_mon = month;
394 /* Some listings will not specify the year if it is "obvious"
395 that the file was from the previous year. E.g. if today
396 is 97-01-12, and you see a file of Dec 15th, its year is
397 1996, not 1997. Thanks to Vladimir Volovich for
399 if (month > tnow->tm_mon)
400 timestruct.tm_year = tnow->tm_year - 1;
402 timestruct.tm_year = tnow->tm_year;
405 timestruct.tm_year = year;
406 if (timestruct.tm_year >= 1900)
407 timestruct.tm_year -= 1900;
408 timestruct.tm_wday = 0;
409 timestruct.tm_yday = 0;
410 timestruct.tm_isdst = -1;
411 l->tstamp = mktime (×truct); /* store the time-stamp */
420 static struct fileinfo *
421 ftp_parse_winnt_ls (const char *file)
425 int year, month, day; /* for time analysis */
427 struct tm timestruct;
429 char *line, *tok; /* tokenizer */
430 struct fileinfo *dir, *l, cur; /* list creation */
432 fp = fopen (file, "rb");
435 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
440 /* Line loop to end of file: */
441 while ((line = read_whole_line (fp)))
443 len = clean_line (line);
445 /* Extracting name is a bit of black magic and we have to do it
446 before `strtok' inserted extra \0 characters in the line
447 string. For the moment let us just suppose that the name starts at
448 column 39 of the listing. This way we could also recognize
449 filenames that begin with a series of space characters (but who
450 really wants to use such filenames anyway?). */
451 if (len < 40) continue;
453 cur.name = xstrdup(tok);
454 DEBUGP(("Name: '%s'\n", cur.name));
456 /* First column: mm-dd-yy. Should atoi() on the month fail, january
458 tok = strtok(line, "-");
459 month = atoi(tok) - 1;
460 if (month < 0) month = 0;
461 tok = strtok(NULL, "-");
463 tok = strtok(NULL, " ");
465 /* Assuming the epoch starting at 1.1.1970 */
466 if (year <= 70) year += 100;
468 /* Second column: hh:mm[AP]M, listing does not contain value for
470 tok = strtok(NULL, ":");
472 tok = strtok(NULL, "M");
474 /* Adjust hour from AM/PM. Just for the record, the sequence goes
475 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
477 if (hour == 12) hour = 0;
478 if (*tok == 'P') hour += 12;
480 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
481 year+1900, month, day, hour, min));
483 /* Build the time-stamp (copy & paste from above) */
484 timestruct.tm_sec = 0;
485 timestruct.tm_min = min;
486 timestruct.tm_hour = hour;
487 timestruct.tm_mday = day;
488 timestruct.tm_mon = month;
489 timestruct.tm_year = year;
490 timestruct.tm_wday = 0;
491 timestruct.tm_yday = 0;
492 timestruct.tm_isdst = -1;
493 cur.tstamp = mktime (×truct); /* store the time-stamp */
495 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
497 /* Third column: Either file length, or <DIR>. We also set the
498 permissions (guessed as 0644 for plain files and 0755 for
499 directories as the listing does not give us a clue) and filetype
501 tok = strtok(NULL, " ");
502 while (*tok == '\0') tok = strtok(NULL, " ");
505 cur.type = FT_DIRECTORY;
508 DEBUGP(("Directory\n"));
512 cur.type = FT_PLAINFILE;
513 cur.size = atoi(tok);
515 DEBUGP(("File, size %ld bytes\n", cur.size));
520 /* And put everything into the linked list */
523 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
524 memcpy (l, &cur, sizeof (cur));
525 l->prev = l->next = NULL;
530 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
532 memcpy (l, &cur, sizeof (cur));
543 /* Converts VMS symbolic permissions to number-style ones, e.g. string
544 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
545 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
547 vmsperms (const char *s)
554 case ',': perms <<= 3; break;
555 case 'R': perms |= 4; break;
556 case 'W': perms |= 2; break;
557 case 'D': perms |= 2; break;
558 case 'E': perms |= 1; break;
559 default: DEBUGP(("wrong VMS permissons!\n"));
567 static struct fileinfo *
568 ftp_parse_vms_ls (const char *file)
571 /* #### A third copy of more-or-less the same array ? */
572 static const char *months[] = {
573 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
574 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
577 int year, month, day; /* for time analysis */
579 struct tm timestruct;
581 char *line, *tok; /* tokenizer */
582 struct fileinfo *dir, *l, cur; /* list creation */
584 fp = fopen (file, "rb");
587 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
592 /* Skip empty line. */
593 line = read_whole_line (fp);
597 /* Skip "Directory PUB$DEVICE[PUB]" */
598 line = read_whole_line (fp);
602 /* Skip empty line. */
603 line = read_whole_line (fp);
607 /* Line loop to end of file: */
608 while ((line = read_whole_line (fp)))
611 i = clean_line (line);
618 /* First column: Name. A bit of black magic again. The name my be
619 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
620 line. Therefore we will first try to get the complete name
621 until the first space character; if it fails, we assume that the name
622 occupies the whole line. After that we search for the version
623 separator ";", we remove it and check the extension of the file;
624 extension .DIR denotes directory. */
626 tok = strtok(line, " ");
627 if (tok == NULL) tok = line;
628 DEBUGP(("file name: '%s'\n", tok));
629 for (p = tok ; *p && *p != ';' ; p++);
630 if (*p == ';') *p = '\0';
631 p = tok + strlen(tok) - 4;
632 if (!strcmp(p, ".DIR")) *p = '\0';
633 cur.name = xstrdup(tok);
634 DEBUGP(("Name: '%s'\n", cur.name));
636 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
637 the file size to zero as the listing does tell us only the size in
638 filesystem blocks - for an integrity check (when mirroring, for
639 example) we would need the size in bytes. */
643 cur.type = FT_DIRECTORY;
645 DEBUGP(("Directory\n"));
649 cur.type = FT_PLAINFILE;
655 /* Second column, if exists, or the first column of the next line
656 contain file size in blocks. We will skip it. */
658 tok = strtok(NULL, " ");
661 DEBUGP(("Getting additional line\n"));
663 line = read_whole_line (fp);
666 DEBUGP(("empty line read, leaving listing parser\n"));
669 i = clean_line (line);
672 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
676 tok = strtok(line, " ");
678 DEBUGP(("second token: '%s'\n", tok));
680 /* Third/Second column: Date DD-MMM-YYYY. */
682 tok = strtok(NULL, "-");
683 DEBUGP(("day: '%s'\n",tok));
685 tok = strtok(NULL, "-");
688 /* If the server produces garbage like
689 'EA95_0PS.GZ;1 No privilege for attempted operation'
690 the first strtok(NULL, "-") will return everything until the end
691 of the line and only the next strtok() call will return NULL. */
692 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
696 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
697 /* Uknown months are mapped to January */
699 tok = strtok (NULL, " ");
700 year = atoi (tok) - 1900;
701 DEBUGP(("date parsed\n"));
703 /* Fourth/Third column: Time hh:mm[:ss] */
704 tok = strtok (NULL, " ");
705 hour = min = sec = 0;
708 for (; *p && *p != ':'; ++p);
711 for (; *p && *p != ':'; ++p);
715 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
716 year+1900, month, day, hour, min, sec));
718 /* Build the time-stamp (copy & paste from above) */
719 timestruct.tm_sec = sec;
720 timestruct.tm_min = min;
721 timestruct.tm_hour = hour;
722 timestruct.tm_mday = day;
723 timestruct.tm_mon = month;
724 timestruct.tm_year = year;
725 timestruct.tm_wday = 0;
726 timestruct.tm_yday = 0;
727 timestruct.tm_isdst = -1;
728 cur.tstamp = mktime (×truct); /* store the time-stamp */
730 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
732 /* Skip the fifth column */
734 tok = strtok(NULL, " ");
736 /* Sixth column: Permissions */
738 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
739 tok = strtok(NULL, ")");
742 DEBUGP(("confusing VMS permissions, skipping line\n"));
746 /* Permissons have the format "RWED,RWED,RE" */
747 cur.perms = vmsperms(tok);
748 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
752 /* And put everything into the linked list */
755 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
756 memcpy (l, &cur, sizeof (cur));
757 l->prev = l->next = NULL;
762 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
764 memcpy (l, &cur, sizeof (cur));
776 /* This function switches between the correct parsing routine depending on
777 the SYSTEM_TYPE. The system type should be based on the result of the
778 "SYST" response of the FTP server. According to this repsonse we will
779 use on of the three different listing parsers that cover the most of FTP
780 servers used nowadays. */
783 ftp_parse_ls (const char *file, const enum stype system_type)
788 return ftp_parse_unix_ls (file, 0);
791 /* Detect whether the listing is simulating the UNIX format */
794 fp = fopen (file, "rb");
797 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
802 /* If the first character of the file is '0'-'9', it's WINNT
804 if (c >= '0' && c <='9')
805 return ftp_parse_winnt_ls (file);
807 return ftp_parse_unix_ls (file, 1);
810 return ftp_parse_vms_ls (file);
812 return ftp_parse_unix_ls (file, 1);
814 logprintf (LOG_NOTQUIET, _("\
815 Unsupported listing type, trying Unix listing parser.\n"));
816 return ftp_parse_unix_ls (file, 0);
820 /* Stuff for creating FTP index. */
822 /* The function creates an HTML index containing references to given
823 directories and files on the appropriate host. The references are
826 ftp_index (const char *file, struct url *u, struct fileinfo *f)
830 char *htclfile; /* HTML-clean file name */
834 fp = fopen (file, "wb");
837 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
845 char *tmpu, *tmpp; /* temporary, clean user and passwd */
847 tmpu = url_escape (u->user);
848 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
849 upwd = (char *)xmalloc (strlen (tmpu)
850 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
851 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
857 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
858 fprintf (fp, "<html>\n<head>\n<title>");
859 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
860 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
861 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
862 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
868 /* #### Should we translate the months? Or, even better, use
870 static char *months[] = {
871 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
872 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
874 struct tm *ptm = localtime ((time_t *)&f->tstamp);
876 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
879 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
884 fprintf (fp, _("time unknown "));
888 fprintf (fp, _("File "));
891 fprintf (fp, _("Directory "));
894 fprintf (fp, _("Link "));
897 fprintf (fp, _("Not sure "));
900 htclfile = html_quote_string (f->name);
901 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
904 fprintf (fp, "%s", u->dir);
907 fprintf (fp, "%s", htclfile);
908 if (f->type == FT_DIRECTORY)
910 fprintf (fp, "\">%s", htclfile);
911 if (f->type == FT_DIRECTORY)
913 fprintf (fp, "</a> ");
914 if (f->type == FT_PLAINFILE)
915 fprintf (fp, _(" (%s bytes)"), legible (f->size));
916 else if (f->type == FT_SYMLINK)
917 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
922 fprintf (fp, "</pre>\n</body>\n</html>\n");