1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
43 #include <sys/types.h>
51 /* Converts symbolic permissions to number-style ones, e.g. string
52 rwxr-xr-x to 755. For now, it knows nothing of
53 setuid/setgid/sticky. ACLs are ignored. */
55 symperms (const char *s)
61 for (i = 0; i < 3; i++, s += 3)
64 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
65 (s[2] == 'x' || s[2] == 's'));
71 /* Cleans a line of text so that it can be consistently parsed. Destroys
72 <CR> and <LF> in case that thay occur at the end of the line and
73 replaces all <TAB> character with <SPACE>. Returns the length of the
76 clean_line(char *line)
78 int len = strlen (line);
80 if (line[len - 1] == '\n')
82 if (line[len - 1] == '\r')
84 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
88 /* Convert the Un*x-ish style directory listing stored in FILE to a
89 linked list of fileinfo (system-independent) entries. The contents
90 of FILE are considered to be produced by the standard Unix `ls -la'
91 output (whatever that might be). BSD (no group) and SYSV (with
92 group) listings are handled.
94 The time stamps are stored in a separate variable, time_t
95 compatible (I hope). The timezones are ignored. */
96 static struct fileinfo *
97 ftp_parse_unix_ls (const char *file, int ignore_perms)
100 static const char *months[] = {
101 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
102 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
104 int next, len, i, error, ignore;
105 int year, month, day; /* for time analysis */
107 struct tm timestruct, *tnow;
110 char *line, *tok; /* tokenizer */
111 struct fileinfo *dir, *l, cur; /* list creation */
113 fp = fopen (file, "rb");
116 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
121 /* Line loop to end of file: */
122 while ((line = read_whole_line (fp)))
124 len = clean_line (line);
125 /* Skip if total... */
126 if (!strncasecmp (line, "total", 5))
131 /* Get the first token (permissions). */
132 tok = strtok (line, " ");
142 /* Decide whether we deal with a file or a directory. */
146 cur.type = FT_PLAINFILE;
147 DEBUGP (("PLAINFILE; "));
150 cur.type = FT_DIRECTORY;
151 DEBUGP (("DIRECTORY; "));
154 cur.type = FT_SYMLINK;
155 DEBUGP (("SYMLINK; "));
158 cur.type = FT_UNKNOWN;
159 DEBUGP (("UNKNOWN; "));
174 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
177 DEBUGP (("implicit perms %0o; ", cur.perms));
181 cur.perms = symperms (tok + 1);
182 DEBUGP (("perms %0o; ", cur.perms));
185 error = ignore = 0; /* Erroneous and ignoring entries are
186 treated equally for now. */
187 year = hour = min = sec = 0; /* Silence the compiler. */
190 /* While there are tokens on the line, parse them. Next is the
191 number of tokens left until the filename.
193 Use the month-name token as the "anchor" (the place where the
194 position wrt the file name is "known"). When a month name is
195 encountered, `next' is set to 5. Also, the preceding
196 characters are parsed to get the file size.
198 This tactic is quite dubious when it comes to
199 internationalization issues (non-English month names), but it
201 while ((tok = strtok (NULL, " ")))
204 if (next < 0) /* a month name was not encountered */
206 for (i = 0; i < 12; i++)
207 if (!strcmp (tok, months[i]))
209 /* If we got a month, it means the token before it is the
210 size, and the filename is three tokens away. */
216 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
217 cur.size += mul * (*t - '0');
220 /* Something is seriously wrong. */
226 DEBUGP (("month: %s; ", months[month]));
229 else if (next == 4) /* days */
231 if (tok[1]) /* two-digit... */
232 day = 10 * (*tok - '0') + tok[1] - '0';
233 else /* ...or one-digit */
235 DEBUGP (("day: %d; ", day));
239 /* This ought to be either the time, or the year. Let's
242 If we have a number x, it's a year. If we have x:y,
243 it's hours and minutes. If we have x:y:z, z are
246 min = hour = sec = 0;
247 /* We must deal with digits. */
250 /* Suppose it's year. */
251 for (; ISDIGIT (*tok); tok++)
252 year = (*tok - '0') + 10 * year;
255 /* This means these were hours! */
259 /* Get the minutes... */
260 for (; ISDIGIT (*tok); tok++)
261 min = (*tok - '0') + 10 * min;
264 /* ...and the seconds. */
266 for (; ISDIGIT (*tok); tok++)
267 sec = (*tok - '0') + 10 * sec;
272 DEBUGP (("year: %d (no tm); ", year));
274 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
276 else if (next == 2) /* The file name */
281 /* Since the file name may contain a SPC, it is possible
282 for strtok to handle it wrong. */
283 fnlen = strlen (tok);
284 if (fnlen < len - (tok - line))
286 /* So we have a SPC in the file name. Restore the
289 /* If the file is a symbolic link, it should have a
291 if (cur.type == FT_SYMLINK)
293 p = strstr (tok, " -> ");
299 cur.linkto = xstrdup (p + 4);
300 DEBUGP (("link to: %s\n", cur.linkto));
301 /* And separate it from the file name. */
305 /* If we have the filename, add it to the list of files or
307 /* "." and ".." are an exception! */
308 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
310 DEBUGP (("\nIgnoring `.' and `..'; "));
314 /* Some FTP sites choose to have ls -F as their default
315 LIST output, which marks the symlinks with a trailing
316 `@', directory names with a trailing `/' and
317 executables with a trailing `*'. This is no problem
318 unless encountering a symbolic link ending with `@',
319 or an executable ending with `*' on a server without
320 default -F output. I believe these cases are very
322 fnlen = strlen (tok); /* re-calculate `fnlen' */
323 cur.name = (char *)xmalloc (fnlen + 1);
324 memcpy (cur.name, tok, fnlen + 1);
327 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
329 cur.name[fnlen - 1] = '\0';
330 DEBUGP (("trailing `/' on dir.\n"));
332 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
334 cur.name[fnlen - 1] = '\0';
335 DEBUGP (("trailing `@' on link.\n"));
337 else if (cur.type == FT_PLAINFILE
338 && (cur.perms & 0111)
339 && cur.name[fnlen - 1] == '*')
341 cur.name[fnlen - 1] = '\0';
342 DEBUGP (("trailing `*' on exec.\n"));
353 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
360 DEBUGP (("Skipping.\n"));
361 FREE_MAYBE (cur.name);
362 FREE_MAYBE (cur.linkto);
369 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
370 memcpy (l, &cur, sizeof (cur));
371 l->prev = l->next = NULL;
376 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
378 memcpy (l, &cur, sizeof (cur));
381 /* Get the current time. */
382 timenow = time (NULL);
383 tnow = localtime (&timenow);
384 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
385 timestruct.tm_sec = sec;
386 timestruct.tm_min = min;
387 timestruct.tm_hour = hour;
388 timestruct.tm_mday = day;
389 timestruct.tm_mon = month;
392 /* Some listings will not specify the year if it is "obvious"
393 that the file was from the previous year. E.g. if today
394 is 97-01-12, and you see a file of Dec 15th, its year is
395 1996, not 1997. Thanks to Vladimir Volovich for
397 if (month > tnow->tm_mon)
398 timestruct.tm_year = tnow->tm_year - 1;
400 timestruct.tm_year = tnow->tm_year;
403 timestruct.tm_year = year;
404 if (timestruct.tm_year >= 1900)
405 timestruct.tm_year -= 1900;
406 timestruct.tm_wday = 0;
407 timestruct.tm_yday = 0;
408 timestruct.tm_isdst = -1;
409 l->tstamp = mktime (×truct); /* store the time-stamp */
418 static struct fileinfo *
419 ftp_parse_winnt_ls (const char *file)
423 int year, month, day; /* for time analysis */
425 struct tm timestruct;
427 char *line, *tok; /* tokenizer */
428 struct fileinfo *dir, *l, cur; /* list creation */
430 fp = fopen (file, "rb");
433 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
438 /* Line loop to end of file: */
439 while ((line = read_whole_line (fp)))
441 len = clean_line (line);
443 /* Extracting name is a bit of black magic and we have to do it
444 before `strtok' inserted extra \0 characters in the line
445 string. For the moment let us just suppose that the name starts at
446 column 39 of the listing. This way we could also recognize
447 filenames that begin with a series of space characters (but who
448 really wants to use such filenames anyway?). */
449 if (len < 40) continue;
451 cur.name = xstrdup(tok);
452 DEBUGP(("Name: '%s'\n", cur.name));
454 /* First column: mm-dd-yy. Should atoi() on the month fail, january
456 tok = strtok(line, "-");
457 month = atoi(tok) - 1;
458 if (month < 0) month = 0;
459 tok = strtok(NULL, "-");
461 tok = strtok(NULL, " ");
463 /* Assuming the epoch starting at 1.1.1970 */
464 if (year <= 70) year += 100;
466 /* Second column: hh:mm[AP]M, listing does not contain value for
468 tok = strtok(NULL, ":");
470 tok = strtok(NULL, "M");
472 /* Adjust hour from AM/PM. Just for the record, the sequence goes
473 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
475 if (hour == 12) hour = 0;
476 if (*tok == 'P') hour += 12;
478 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
479 year+1900, month, day, hour, min));
481 /* Build the time-stamp (copy & paste from above) */
482 timestruct.tm_sec = 0;
483 timestruct.tm_min = min;
484 timestruct.tm_hour = hour;
485 timestruct.tm_mday = day;
486 timestruct.tm_mon = month;
487 timestruct.tm_year = year;
488 timestruct.tm_wday = 0;
489 timestruct.tm_yday = 0;
490 timestruct.tm_isdst = -1;
491 cur.tstamp = mktime (×truct); /* store the time-stamp */
493 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
495 /* Third column: Either file length, or <DIR>. We also set the
496 permissions (guessed as 0644 for plain files and 0755 for
497 directories as the listing does not give us a clue) and filetype
499 tok = strtok(NULL, " ");
500 while (*tok == '\0') tok = strtok(NULL, " ");
503 cur.type = FT_DIRECTORY;
506 DEBUGP(("Directory\n"));
510 cur.type = FT_PLAINFILE;
511 cur.size = atoi(tok);
513 DEBUGP(("File, size %ld bytes\n", cur.size));
518 /* And put everything into the linked list */
521 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
522 memcpy (l, &cur, sizeof (cur));
523 l->prev = l->next = NULL;
528 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
530 memcpy (l, &cur, sizeof (cur));
541 /* Converts VMS symbolic permissions to number-style ones, e.g. string
542 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
543 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
545 vmsperms (const char *s)
552 case ',': perms <<= 3; break;
553 case 'R': perms |= 4; break;
554 case 'W': perms |= 2; break;
555 case 'D': perms |= 2; break;
556 case 'E': perms |= 1; break;
557 default: DEBUGP(("wrong VMS permissons!\n"));
565 static struct fileinfo *
566 ftp_parse_vms_ls (const char *file)
569 /* #### A third copy of more-or-less the same array ? */
570 static const char *months[] = {
571 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
572 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
575 int year, month, day; /* for time analysis */
577 struct tm timestruct;
579 char *line, *tok; /* tokenizer */
580 struct fileinfo *dir, *l, cur; /* list creation */
582 fp = fopen (file, "rb");
585 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
590 /* Skip empty line. */
591 line = read_whole_line (fp);
595 /* Skip "Directory PUB$DEVICE[PUB]" */
596 line = read_whole_line (fp);
600 /* Skip empty line. */
601 line = read_whole_line (fp);
605 /* Line loop to end of file: */
606 while ((line = read_whole_line (fp)))
609 i = clean_line (line);
616 /* First column: Name. A bit of black magic again. The name my be
617 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
618 line. Therefore we will first try to get the complete name
619 until the first space character; if it fails, we assume that the name
620 occupies the whole line. After that we search for the version
621 separator ";", we remove it and check the extension of the file;
622 extension .DIR denotes directory. */
624 tok = strtok(line, " ");
625 if (tok == NULL) tok = line;
626 DEBUGP(("file name: '%s'\n", tok));
627 for (p = tok ; *p && *p != ';' ; p++);
628 if (*p == ';') *p = '\0';
629 p = tok + strlen(tok) - 4;
630 if (!strcmp(p, ".DIR")) *p = '\0';
631 cur.name = xstrdup(tok);
632 DEBUGP(("Name: '%s'\n", cur.name));
634 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
635 the file size to zero as the listing does tell us only the size in
636 filesystem blocks - for an integrity check (when mirroring, for
637 example) we would need the size in bytes. */
641 cur.type = FT_DIRECTORY;
643 DEBUGP(("Directory\n"));
647 cur.type = FT_PLAINFILE;
653 /* Second column, if exists, or the first column of the next line
654 contain file size in blocks. We will skip it. */
656 tok = strtok(NULL, " ");
659 DEBUGP(("Getting additional line\n"));
661 line = read_whole_line (fp);
664 DEBUGP(("empty line read, leaving listing parser\n"));
667 i = clean_line (line);
670 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
674 tok = strtok(line, " ");
676 DEBUGP(("second token: '%s'\n", tok));
678 /* Third/Second column: Date DD-MMM-YYYY. */
680 tok = strtok(NULL, "-");
681 DEBUGP(("day: '%s'\n",tok));
683 tok = strtok(NULL, "-");
686 /* If the server produces garbage like
687 'EA95_0PS.GZ;1 No privilege for attempted operation'
688 the first strtok(NULL, "-") will return everything until the end
689 of the line and only the next strtok() call will return NULL. */
690 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
694 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
695 /* Uknown months are mapped to January */
697 tok = strtok (NULL, " ");
698 year = atoi (tok) - 1900;
699 DEBUGP(("date parsed\n"));
701 /* Fourth/Third column: Time hh:mm[:ss] */
702 tok = strtok (NULL, " ");
703 hour = min = sec = 0;
706 for (; *p && *p != ':'; ++p);
709 for (; *p && *p != ':'; ++p);
713 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
714 year+1900, month, day, hour, min, sec));
716 /* Build the time-stamp (copy & paste from above) */
717 timestruct.tm_sec = sec;
718 timestruct.tm_min = min;
719 timestruct.tm_hour = hour;
720 timestruct.tm_mday = day;
721 timestruct.tm_mon = month;
722 timestruct.tm_year = year;
723 timestruct.tm_wday = 0;
724 timestruct.tm_yday = 0;
725 timestruct.tm_isdst = -1;
726 cur.tstamp = mktime (×truct); /* store the time-stamp */
728 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
730 /* Skip the fifth column */
732 tok = strtok(NULL, " ");
734 /* Sixth column: Permissions */
736 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
737 tok = strtok(NULL, ")");
740 DEBUGP(("confusing VMS permissions, skipping line\n"));
744 /* Permissons have the format "RWED,RWED,RE" */
745 cur.perms = vmsperms(tok);
746 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
750 /* And put everything into the linked list */
753 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
754 memcpy (l, &cur, sizeof (cur));
755 l->prev = l->next = NULL;
760 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
762 memcpy (l, &cur, sizeof (cur));
774 /* This function switches between the correct parsing routine depending on
775 the SYSTEM_TYPE. The system type should be based on the result of the
776 "SYST" response of the FTP server. According to this repsonse we will
777 use on of the three different listing parsers that cover the most of FTP
778 servers used nowadays. */
781 ftp_parse_ls (const char *file, const enum stype system_type)
786 return ftp_parse_unix_ls (file, FALSE);
789 /* Detect whether the listing is simulating the UNIX format */
792 fp = fopen (file, "rb");
795 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
800 /* If the first character of the file is '0'-'9', it's WINNT
802 if (c >= '0' && c <='9')
803 return ftp_parse_winnt_ls (file);
805 return ftp_parse_unix_ls (file, TRUE);
808 return ftp_parse_vms_ls (file);
810 return ftp_parse_unix_ls (file, TRUE);
812 logprintf (LOG_NOTQUIET, _("\
813 Unsupported listing type, trying Unix listing parser.\n"));
814 return ftp_parse_unix_ls (file, FALSE);
818 /* Stuff for creating FTP index. */
820 /* The function creates an HTML index containing references to given
821 directories and files on the appropriate host. The references are
824 ftp_index (const char *file, struct url *u, struct fileinfo *f)
828 char *htclfile; /* HTML-clean file name */
832 fp = fopen (file, "wb");
835 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
843 char *tmpu, *tmpp; /* temporary, clean user and passwd */
845 tmpu = url_escape (u->user);
846 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
847 upwd = (char *)xmalloc (strlen (tmpu)
848 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
849 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
855 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
856 fprintf (fp, "<html>\n<head>\n<title>");
857 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
858 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
859 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
860 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
866 /* #### Should we translate the months? Or, even better, use
868 static char *months[] = {
869 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
870 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
872 struct tm *ptm = localtime ((time_t *)&f->tstamp);
874 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
877 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
882 fprintf (fp, _("time unknown "));
886 fprintf (fp, _("File "));
889 fprintf (fp, _("Directory "));
892 fprintf (fp, _("Link "));
895 fprintf (fp, _("Not sure "));
898 htclfile = html_quote_string (f->name);
899 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
902 fprintf (fp, "%s", u->dir);
905 fprintf (fp, "%s", htclfile);
906 if (f->type == FT_DIRECTORY)
908 fprintf (fp, "\">%s", htclfile);
909 if (f->type == FT_DIRECTORY)
911 fprintf (fp, "</a> ");
912 if (f->type == FT_PLAINFILE)
913 fprintf (fp, _(" (%s bytes)"), legible (f->size));
914 else if (f->type == FT_SYMLINK)
915 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
920 fprintf (fp, "</pre>\n</body>\n</html>\n");