1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
33 #include <sys/types.h>
41 /* Converts symbolic permissions to number-style ones, e.g. string
42 rwxr-xr-x to 755. For now, it knows nothing of
43 setuid/setgid/sticky. ACLs are ignored. */
45 symperms (const char *s)
51 for (i = 0; i < 3; i++, s += 3)
54 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
55 (s[2] == 'x' || s[2] == 's'));
61 /* Cleans a line of text so that it can be consistently parsed. Destroys
62 <CR> and <LF> in case that thay occur at the end of the line and
63 replaces all <TAB> character with <SPACE>. Returns the length of the
66 clean_line(char *line)
68 int len = strlen (line);
70 if (line[len - 1] == '\n')
72 if (line[len - 1] == '\r')
74 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
78 /* Convert the Un*x-ish style directory listing stored in FILE to a
79 linked list of fileinfo (system-independent) entries. The contents
80 of FILE are considered to be produced by the standard Unix `ls -la'
81 output (whatever that might be). BSD (no group) and SYSV (with
82 group) listings are handled.
84 The time stamps are stored in a separate variable, time_t
85 compatible (I hope). The timezones are ignored. */
86 static struct fileinfo *
87 ftp_parse_unix_ls (const char *file, int ignore_perms)
90 static const char *months[] = {
91 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
92 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
94 int next, len, i, error, ignore;
95 int year, month, day; /* for time analysis */
97 struct tm timestruct, *tnow;
100 char *line, *tok; /* tokenizer */
101 struct fileinfo *dir, *l, cur; /* list creation */
103 fp = fopen (file, "rb");
106 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
111 /* Line loop to end of file: */
112 while ((line = read_whole_line (fp)))
114 len = clean_line (line);
115 /* Skip if total... */
116 if (!strncasecmp (line, "total", 5))
121 /* Get the first token (permissions). */
122 tok = strtok (line, " ");
132 /* Decide whether we deal with a file or a directory. */
136 cur.type = FT_PLAINFILE;
137 DEBUGP (("PLAINFILE; "));
140 cur.type = FT_DIRECTORY;
141 DEBUGP (("DIRECTORY; "));
144 cur.type = FT_SYMLINK;
145 DEBUGP (("SYMLINK; "));
148 cur.type = FT_UNKNOWN;
149 DEBUGP (("UNKNOWN; "));
164 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
167 DEBUGP (("implicit perms %0o; ", cur.perms));
171 cur.perms = symperms (tok + 1);
172 DEBUGP (("perms %0o; ", cur.perms));
175 error = ignore = 0; /* Erroneous and ignoring entries are
176 treated equally for now. */
177 year = hour = min = sec = 0; /* Silence the compiler. */
180 /* While there are tokens on the line, parse them. Next is the
181 number of tokens left until the filename.
183 Use the month-name token as the "anchor" (the place where the
184 position wrt the file name is "known"). When a month name is
185 encountered, `next' is set to 5. Also, the preceding
186 characters are parsed to get the file size.
188 This tactic is quite dubious when it comes to
189 internationalization issues (non-English month names), but it
191 while ((tok = strtok (NULL, " ")))
194 if (next < 0) /* a month name was not encountered */
196 for (i = 0; i < 12; i++)
197 if (!strcmp (tok, months[i]))
199 /* If we got a month, it means the token before it is the
200 size, and the filename is three tokens away. */
206 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
207 cur.size += mul * (*t - '0');
210 /* Something is seriously wrong. */
216 DEBUGP (("month: %s; ", months[month]));
219 else if (next == 4) /* days */
221 if (tok[1]) /* two-digit... */
222 day = 10 * (*tok - '0') + tok[1] - '0';
223 else /* ...or one-digit */
225 DEBUGP (("day: %d; ", day));
229 /* This ought to be either the time, or the year. Let's
232 If we have a number x, it's a year. If we have x:y,
233 it's hours and minutes. If we have x:y:z, z are
236 min = hour = sec = 0;
237 /* We must deal with digits. */
240 /* Suppose it's year. */
241 for (; ISDIGIT (*tok); tok++)
242 year = (*tok - '0') + 10 * year;
245 /* This means these were hours! */
249 /* Get the minutes... */
250 for (; ISDIGIT (*tok); tok++)
251 min = (*tok - '0') + 10 * min;
254 /* ...and the seconds. */
256 for (; ISDIGIT (*tok); tok++)
257 sec = (*tok - '0') + 10 * sec;
262 DEBUGP (("year: %d (no tm); ", year));
264 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
266 else if (next == 2) /* The file name */
271 /* Since the file name may contain a SPC, it is possible
272 for strtok to handle it wrong. */
273 fnlen = strlen (tok);
274 if (fnlen < len - (tok - line))
276 /* So we have a SPC in the file name. Restore the
279 /* If the file is a symbolic link, it should have a
281 if (cur.type == FT_SYMLINK)
283 p = strstr (tok, " -> ");
289 cur.linkto = xstrdup (p + 4);
290 DEBUGP (("link to: %s\n", cur.linkto));
291 /* And separate it from the file name. */
295 /* If we have the filename, add it to the list of files or
297 /* "." and ".." are an exception! */
298 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
300 DEBUGP (("\nIgnoring `.' and `..'; "));
304 /* Some FTP sites choose to have ls -F as their default
305 LIST output, which marks the symlinks with a trailing
306 `@', directory names with a trailing `/' and
307 executables with a trailing `*'. This is no problem
308 unless encountering a symbolic link ending with `@',
309 or an executable ending with `*' on a server without
310 default -F output. I believe these cases are very
312 fnlen = strlen (tok); /* re-calculate `fnlen' */
313 cur.name = (char *)xmalloc (fnlen + 1);
314 memcpy (cur.name, tok, fnlen + 1);
317 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
319 cur.name[fnlen - 1] = '\0';
320 DEBUGP (("trailing `/' on dir.\n"));
322 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
324 cur.name[fnlen - 1] = '\0';
325 DEBUGP (("trailing `@' on link.\n"));
327 else if (cur.type == FT_PLAINFILE
328 && (cur.perms & 0111)
329 && cur.name[fnlen - 1] == '*')
331 cur.name[fnlen - 1] = '\0';
332 DEBUGP (("trailing `*' on exec.\n"));
343 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
350 DEBUGP (("Skipping.\n"));
351 FREE_MAYBE (cur.name);
352 FREE_MAYBE (cur.linkto);
359 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
360 memcpy (l, &cur, sizeof (cur));
361 l->prev = l->next = NULL;
366 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
368 memcpy (l, &cur, sizeof (cur));
371 /* Get the current time. */
372 timenow = time (NULL);
373 tnow = localtime (&timenow);
374 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
375 timestruct.tm_sec = sec;
376 timestruct.tm_min = min;
377 timestruct.tm_hour = hour;
378 timestruct.tm_mday = day;
379 timestruct.tm_mon = month;
382 /* Some listings will not specify the year if it is "obvious"
383 that the file was from the previous year. E.g. if today
384 is 97-01-12, and you see a file of Dec 15th, its year is
385 1996, not 1997. Thanks to Vladimir Volovich for
387 if (month > tnow->tm_mon)
388 timestruct.tm_year = tnow->tm_year - 1;
390 timestruct.tm_year = tnow->tm_year;
393 timestruct.tm_year = year;
394 if (timestruct.tm_year >= 1900)
395 timestruct.tm_year -= 1900;
396 timestruct.tm_wday = 0;
397 timestruct.tm_yday = 0;
398 timestruct.tm_isdst = -1;
399 l->tstamp = mktime (×truct); /* store the time-stamp */
408 static struct fileinfo *
409 ftp_parse_winnt_ls (const char *file)
413 int year, month, day; /* for time analysis */
415 struct tm timestruct;
417 char *line, *tok; /* tokenizer */
418 struct fileinfo *dir, *l, cur; /* list creation */
420 fp = fopen (file, "rb");
423 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
428 /* Line loop to end of file: */
429 while ((line = read_whole_line (fp)))
431 len = clean_line (line);
433 /* Extracting name is a bit of black magic and we have to do it
434 before `strtok' inserted extra \0 characters in the line
435 string. For the moment let us just suppose that the name starts at
436 column 39 of the listing. This way we could also recognize
437 filenames that begin with a series of space characters (but who
438 really wants to use such filenames anyway?). */
439 if (len < 40) continue;
441 cur.name = xstrdup(tok);
442 DEBUGP(("Name: '%s'\n", cur.name));
444 /* First column: mm-dd-yy. Should atoi() on the month fail, january
446 tok = strtok(line, "-");
447 month = atoi(tok) - 1;
448 if (month < 0) month = 0;
449 tok = strtok(NULL, "-");
451 tok = strtok(NULL, " ");
453 /* Assuming the epoch starting at 1.1.1970 */
454 if (year <= 70) year += 100;
456 /* Second column: hh:mm[AP]M, listing does not contain value for
458 tok = strtok(NULL, ":");
460 tok = strtok(NULL, "M");
462 /* Adjust hour from AM/PM. Just for the record, the sequence goes
463 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
465 if (hour == 12) hour = 0;
466 if (*tok == 'P') hour += 12;
468 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
469 year+1900, month, day, hour, min));
471 /* Build the time-stamp (copy & paste from above) */
472 timestruct.tm_sec = 0;
473 timestruct.tm_min = min;
474 timestruct.tm_hour = hour;
475 timestruct.tm_mday = day;
476 timestruct.tm_mon = month;
477 timestruct.tm_year = year;
478 timestruct.tm_wday = 0;
479 timestruct.tm_yday = 0;
480 timestruct.tm_isdst = -1;
481 cur.tstamp = mktime (×truct); /* store the time-stamp */
483 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
485 /* Third column: Either file length, or <DIR>. We also set the
486 permissions (guessed as 0644 for plain files and 0755 for
487 directories as the listing does not give us a clue) and filetype
489 tok = strtok(NULL, " ");
490 while (*tok == '\0') tok = strtok(NULL, " ");
493 cur.type = FT_DIRECTORY;
496 DEBUGP(("Directory\n"));
500 cur.type = FT_PLAINFILE;
501 cur.size = atoi(tok);
503 DEBUGP(("File, size %ld bytes\n", cur.size));
508 /* And put everything into the linked list */
511 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
512 memcpy (l, &cur, sizeof (cur));
513 l->prev = l->next = NULL;
518 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
520 memcpy (l, &cur, sizeof (cur));
531 /* Converts VMS symbolic permissions to number-style ones, e.g. string
532 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
533 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
535 vmsperms (const char *s)
542 case ',': perms <<= 3; break;
543 case 'R': perms |= 4; break;
544 case 'W': perms |= 2; break;
545 case 'D': perms |= 2; break;
546 case 'E': perms |= 1; break;
547 default: DEBUGP(("wrong VMS permissons!\n"));
555 static struct fileinfo *
556 ftp_parse_vms_ls (const char *file)
559 /* #### A third copy of more-or-less the same array ? */
560 static const char *months[] = {
561 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
562 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
565 int year, month, day; /* for time analysis */
567 struct tm timestruct;
569 char *line, *tok; /* tokenizer */
570 struct fileinfo *dir, *l, cur; /* list creation */
572 fp = fopen (file, "rb");
575 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
580 /* Skip empty line. */
581 line = read_whole_line (fp);
585 /* Skip "Directory PUB$DEVICE[PUB]" */
586 line = read_whole_line (fp);
590 /* Skip empty line. */
591 line = read_whole_line (fp);
595 /* Line loop to end of file: */
596 while ((line = read_whole_line (fp)))
599 i = clean_line (line);
606 /* First column: Name. A bit of black magic again. The name my be
607 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
608 line. Therefore we will first try to get the complete name
609 until the first space character; if it fails, we assume that the name
610 occupies the whole line. After that we search for the version
611 separator ";", we remove it and check the extension of the file;
612 extension .DIR denotes directory. */
614 tok = strtok(line, " ");
615 if (tok == NULL) tok = line;
616 DEBUGP(("file name: '%s'\n", tok));
617 for (p = tok ; *p && *p != ';' ; p++);
618 if (*p == ';') *p = '\0';
619 p = tok + strlen(tok) - 4;
620 if (!strcmp(p, ".DIR")) *p = '\0';
621 cur.name = xstrdup(tok);
622 DEBUGP(("Name: '%s'\n", cur.name));
624 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
625 the file size to zero as the listing does tell us only the size in
626 filesystem blocks - for an integrity check (when mirroring, for
627 example) we would need the size in bytes. */
631 cur.type = FT_DIRECTORY;
633 DEBUGP(("Directory\n"));
637 cur.type = FT_PLAINFILE;
643 /* Second column, if exists, or the first column of the next line
644 contain file size in blocks. We will skip it. */
646 tok = strtok(NULL, " ");
649 DEBUGP(("Getting additional line\n"));
651 line = read_whole_line (fp);
654 DEBUGP(("empty line read, leaving listing parser\n"));
657 i = clean_line (line);
660 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
664 tok = strtok(line, " ");
666 DEBUGP(("second token: '%s'\n", tok));
668 /* Third/Second column: Date DD-MMM-YYYY. */
670 tok = strtok(NULL, "-");
671 DEBUGP(("day: '%s'\n",tok));
673 tok = strtok(NULL, "-");
676 /* If the server produces garbage like
677 'EA95_0PS.GZ;1 No privilege for attempted operation'
678 the first strtok(NULL, "-") will return everything until the end
679 of the line and only the next strtok() call will return NULL. */
680 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
684 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
685 /* Uknown months are mapped to January */
687 tok = strtok (NULL, " ");
688 year = atoi (tok) - 1900;
689 DEBUGP(("date parsed\n"));
691 /* Fourth/Third column: Time hh:mm[:ss] */
692 tok = strtok (NULL, " ");
693 hour = min = sec = 0;
696 for (; *p && *p != ':'; ++p);
699 for (; *p && *p != ':'; ++p);
703 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
704 year+1900, month, day, hour, min, sec));
706 /* Build the time-stamp (copy & paste from above) */
707 timestruct.tm_sec = sec;
708 timestruct.tm_min = min;
709 timestruct.tm_hour = hour;
710 timestruct.tm_mday = day;
711 timestruct.tm_mon = month;
712 timestruct.tm_year = year;
713 timestruct.tm_wday = 0;
714 timestruct.tm_yday = 0;
715 timestruct.tm_isdst = -1;
716 cur.tstamp = mktime (×truct); /* store the time-stamp */
718 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
720 /* Skip the fifth column */
722 tok = strtok(NULL, " ");
724 /* Sixth column: Permissions */
726 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
727 tok = strtok(NULL, ")");
730 DEBUGP(("confusing VMS permissions, skipping line\n"));
734 /* Permissons have the format "RWED,RWED,RE" */
735 cur.perms = vmsperms(tok);
736 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
740 /* And put everything into the linked list */
743 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
744 memcpy (l, &cur, sizeof (cur));
745 l->prev = l->next = NULL;
750 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
752 memcpy (l, &cur, sizeof (cur));
764 /* This function switches between the correct parsing routine depending on
765 the SYSTEM_TYPE. The system type should be based on the result of the
766 "SYST" response of the FTP server. According to this repsonse we will
767 use on of the three different listing parsers that cover the most of FTP
768 servers used nowadays. */
771 ftp_parse_ls (const char *file, const enum stype system_type)
776 return ftp_parse_unix_ls (file, FALSE);
779 /* Detect whether the listing is simulating the UNIX format */
782 fp = fopen (file, "rb");
785 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
790 /* If the first character of the file is '0'-'9', it's WINNT
792 if (c >= '0' && c <='9')
793 return ftp_parse_winnt_ls (file);
795 return ftp_parse_unix_ls (file, TRUE);
798 return ftp_parse_vms_ls (file);
800 return ftp_parse_unix_ls (file, TRUE);
802 logprintf (LOG_NOTQUIET, _("\
803 Unsupported listing type, trying Unix listing parser.\n"));
804 return ftp_parse_unix_ls (file, FALSE);
808 /* Stuff for creating FTP index. */
810 /* The function creates an HTML index containing references to given
811 directories and files on the appropriate host. The references are
814 ftp_index (const char *file, struct url *u, struct fileinfo *f)
818 char *htclfile; /* HTML-clean file name */
822 fp = fopen (file, "wb");
825 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
833 char *tmpu, *tmpp; /* temporary, clean user and passwd */
835 tmpu = encode_string (u->user);
836 tmpp = u->passwd ? encode_string (u->passwd) : NULL;
837 upwd = (char *)xmalloc (strlen (tmpu)
838 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
839 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
845 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
846 fprintf (fp, "<html>\n<head>\n<title>");
847 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
848 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
849 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
850 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
856 /* #### Should we translate the months? */
857 static char *months[] = {
858 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
859 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
861 struct tm *ptm = localtime ((time_t *)&f->tstamp);
863 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
866 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
871 fprintf (fp, _("time unknown "));
875 fprintf (fp, _("File "));
878 fprintf (fp, _("Directory "));
881 fprintf (fp, _("Link "));
884 fprintf (fp, _("Not sure "));
887 htclfile = html_quote_string (f->name);
888 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
891 fprintf (fp, "%s", u->dir);
894 fprintf (fp, "%s", htclfile);
895 if (f->type == FT_DIRECTORY)
897 fprintf (fp, "\">%s", htclfile);
898 if (f->type == FT_DIRECTORY)
900 fprintf (fp, "</a> ");
901 if (f->type == FT_PLAINFILE)
902 fprintf (fp, _(" (%s bytes)"), legible (f->size));
903 else if (f->type == FT_SYMLINK)
904 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
909 fprintf (fp, "</pre>\n</body>\n</html>\n");