1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation,
5 This file is part of Wget.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
33 #include <sys/types.h>
41 /* Converts symbolic permissions to number-style ones, e.g. string
42 rwxr-xr-x to 755. For now, it knows nothing of
43 setuid/setgid/sticky. ACLs are ignored. */
45 symperms (const char *s)
51 for (i = 0; i < 3; i++, s += 3)
54 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
55 (s[2] == 'x' || s[2] == 's'));
61 /* Cleans a line of text so that it can be consistently parsed. Destroys
62 <CR> and <LF> in case that thay occur at the end of the line and
63 replaces all <TAB> character with <SPACE>. Returns the length of the
66 clean_line(char *line)
68 int len = strlen (line);
70 if (line[len - 1] == '\n')
72 if (line[len - 1] == '\r')
74 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
78 /* Convert the Un*x-ish style directory listing stored in FILE to a
79 linked list of fileinfo (system-independent) entries. The contents
80 of FILE are considered to be produced by the standard Unix `ls -la'
81 output (whatever that might be). BSD (no group) and SYSV (with
82 group) listings are handled.
84 The time stamps are stored in a separate variable, time_t
85 compatible (I hope). The timezones are ignored. */
86 static struct fileinfo *
87 ftp_parse_unix_ls (const char *file, int ignore_perms)
90 static const char *months[] = {
91 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
92 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
94 int next, len, i, error, ignore;
95 int year, month, day; /* for time analysis */
97 struct tm timestruct, *tnow;
100 char *line, *tok; /* tokenizer */
101 struct fileinfo *dir, *l, cur; /* list creation */
103 fp = fopen (file, "rb");
106 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
111 /* Line loop to end of file: */
112 while ((line = read_whole_line (fp)))
114 len = clean_line (line);
115 /* Skip if total... */
116 if (!strncasecmp (line, "total", 5))
121 /* Get the first token (permissions). */
122 tok = strtok (line, " ");
132 /* Decide whether we deal with a file or a directory. */
136 cur.type = FT_PLAINFILE;
137 DEBUGP (("PLAINFILE; "));
140 cur.type = FT_DIRECTORY;
141 DEBUGP (("DIRECTORY; "));
144 cur.type = FT_SYMLINK;
145 DEBUGP (("SYMLINK; "));
148 cur.type = FT_UNKNOWN;
149 DEBUGP (("UNKOWN; "));
166 DEBUGP (("implicite perms %0o; ", cur.perms));
170 cur.perms = symperms (tok + 1);
171 DEBUGP (("perms %0o; ", cur.perms));
174 error = ignore = 0; /* Erroneous and ignoring entries are
175 treated equally for now. */
176 year = hour = min = sec = 0; /* Silence the compiler. */
179 /* While there are tokens on the line, parse them. Next is the
180 number of tokens left until the filename.
182 Use the month-name token as the "anchor" (the place where the
183 position wrt the file name is "known"). When a month name is
184 encountered, `next' is set to 5. Also, the preceding
185 characters are parsed to get the file size.
187 This tactic is quite dubious when it comes to
188 internationalization issues (non-English month names), but it
190 while ((tok = strtok (NULL, " ")))
193 if (next < 0) /* a month name was not encountered */
195 for (i = 0; i < 12; i++)
196 if (!strcmp (tok, months[i]))
198 /* If we got a month, it means the token before it is the
199 size, and the filename is three tokens away. */
205 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
206 cur.size += mul * (*t - '0');
209 /* Something is seriously wrong. */
215 DEBUGP (("month: %s; ", months[month]));
218 else if (next == 4) /* days */
220 if (tok[1]) /* two-digit... */
221 day = 10 * (*tok - '0') + tok[1] - '0';
222 else /* ...or one-digit */
224 DEBUGP (("day: %d; ", day));
228 /* This ought to be either the time, or the year. Let's
231 If we have a number x, it's a year. If we have x:y,
232 it's hours and minutes. If we have x:y:z, z are
235 min = hour = sec = 0;
236 /* We must deal with digits. */
239 /* Suppose it's year. */
240 for (; ISDIGIT (*tok); tok++)
241 year = (*tok - '0') + 10 * year;
244 /* This means these were hours! */
248 /* Get the minutes... */
249 for (; ISDIGIT (*tok); tok++)
250 min = (*tok - '0') + 10 * min;
253 /* ...and the seconds. */
255 for (; ISDIGIT (*tok); tok++)
256 sec = (*tok - '0') + 10 * sec;
261 DEBUGP (("year: %d (no tm); ", year));
263 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
265 else if (next == 2) /* The file name */
270 /* Since the file name may contain a SPC, it is possible
271 for strtok to handle it wrong. */
272 fnlen = strlen (tok);
273 if (fnlen < len - (tok - line))
275 /* So we have a SPC in the file name. Restore the
278 /* If the file is a symbolic link, it should have a
280 if (cur.type == FT_SYMLINK)
282 p = strstr (tok, " -> ");
288 cur.linkto = xstrdup (p + 4);
289 DEBUGP (("link to: %s\n", cur.linkto));
290 /* And separate it from the file name. */
294 /* If we have the filename, add it to the list of files or
296 /* "." and ".." are an exception! */
297 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
299 DEBUGP (("\nIgnoring `.' and `..'; "));
303 /* Some FTP sites choose to have ls -F as their default
304 LIST output, which marks the symlinks with a trailing
305 `@', directory names with a trailing `/' and
306 executables with a trailing `*'. This is no problem
307 unless encountering a symbolic link ending with `@',
308 or an executable ending with `*' on a server without
309 default -F output. I believe these cases are very
311 fnlen = strlen (tok); /* re-calculate `fnlen' */
312 cur.name = (char *)xmalloc (fnlen + 1);
313 memcpy (cur.name, tok, fnlen + 1);
316 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
318 cur.name[fnlen - 1] = '\0';
319 DEBUGP (("trailing `/' on dir.\n"));
321 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
323 cur.name[fnlen - 1] = '\0';
324 DEBUGP (("trailing `@' on link.\n"));
326 else if (cur.type == FT_PLAINFILE
327 && (cur.perms & 0111)
328 && cur.name[fnlen - 1] == '*')
330 cur.name[fnlen - 1] = '\0';
331 DEBUGP (("trailing `*' on exec.\n"));
342 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
349 DEBUGP (("Skipping.\n"));
350 FREE_MAYBE (cur.name);
351 FREE_MAYBE (cur.linkto);
358 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
359 memcpy (l, &cur, sizeof (cur));
360 l->prev = l->next = NULL;
365 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
367 memcpy (l, &cur, sizeof (cur));
370 /* Get the current time. */
371 timenow = time (NULL);
372 tnow = localtime (&timenow);
373 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
374 timestruct.tm_sec = sec;
375 timestruct.tm_min = min;
376 timestruct.tm_hour = hour;
377 timestruct.tm_mday = day;
378 timestruct.tm_mon = month;
381 /* Some listings will not specify the year if it is "obvious"
382 that the file was from the previous year. E.g. if today
383 is 97-01-12, and you see a file of Dec 15th, its year is
384 1996, not 1997. Thanks to Vladimir Volovich for
386 if (month > tnow->tm_mon)
387 timestruct.tm_year = tnow->tm_year - 1;
389 timestruct.tm_year = tnow->tm_year;
392 timestruct.tm_year = year;
393 if (timestruct.tm_year >= 1900)
394 timestruct.tm_year -= 1900;
395 timestruct.tm_wday = 0;
396 timestruct.tm_yday = 0;
397 timestruct.tm_isdst = -1;
398 l->tstamp = mktime (×truct); /* store the time-stamp */
407 static struct fileinfo *
408 ftp_parse_winnt_ls (const char *file)
412 int year, month, day; /* for time analysis */
414 struct tm timestruct;
416 char *line, *tok; /* tokenizer */
417 struct fileinfo *dir, *l, cur; /* list creation */
419 fp = fopen (file, "rb");
422 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
427 /* Line loop to end of file: */
428 while ((line = read_whole_line (fp)))
430 len = clean_line (line);
432 /* Extracting name is a bit of black magic and we have to do it
433 before `strtok' inserted extra \0 characters in the line
434 string. For the moment let us just suppose that the name starts at
435 column 39 of the listing. This way we could also recognize
436 filenames that begin with a series of space characters (but who
437 really wants to use such filenames anyway?). */
438 if (len < 40) continue;
440 cur.name = xstrdup(tok);
441 DEBUGP(("Name: '%s'\n", cur.name));
443 /* First column: mm-dd-yy */
444 tok = strtok(line, "-");
446 tok = strtok(NULL, "-");
448 tok = strtok(NULL, " ");
450 /* Assuming the epoch starting at 1.1.1970 */
451 if (year <= 70) year += 100;
453 /* Second column: hh:mm[AP]M, listing does not contain value for
455 tok = strtok(NULL, ":");
457 tok = strtok(NULL, "M");
459 /* Adjust hour from AM/PM. Just for the record, the sequence goes
460 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
462 if (hour == 12) hour = 0;
463 if (*tok == 'P') hour += 12;
465 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
466 year+1900, month, day, hour, min));
468 /* Build the time-stamp (copy & paste from above) */
469 timestruct.tm_sec = 0;
470 timestruct.tm_min = min;
471 timestruct.tm_hour = hour;
472 timestruct.tm_mday = day;
473 timestruct.tm_mon = month;
474 timestruct.tm_year = year;
475 timestruct.tm_wday = 0;
476 timestruct.tm_yday = 0;
477 timestruct.tm_isdst = -1;
478 cur.tstamp = mktime (×truct); /* store the time-stamp */
480 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
482 /* Third column: Either file length, or <DIR>. We also set the
483 permissions (guessed as 0644 for plain files and 0755 for
484 directories as the listing does not give us a clue) and filetype
486 tok = strtok(NULL, " ");
487 while (*tok == '\0') tok = strtok(NULL, " ");
490 cur.type = FT_DIRECTORY;
492 cur.perms = 493; /* my gcc does not like 0755 ?? */
493 DEBUGP(("Directory\n"));
497 cur.type = FT_PLAINFILE;
498 cur.size = atoi(tok);
499 cur.perms = 420; /* 0664 octal */
500 DEBUGP(("File, size %ld bytes\n", cur.size));
505 /* And put everything into the linked list */
508 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
509 memcpy (l, &cur, sizeof (cur));
510 l->prev = l->next = NULL;
515 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
517 memcpy (l, &cur, sizeof (cur));
528 /* Converts VMS symbolic permissions to number-style ones, e.g. string
529 RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
530 (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
532 vmsperms (const char *s)
539 case ',': perms <<= 3; break;
540 case 'R': perms |= 4; break;
541 case 'W': perms |= 2; break;
542 case 'D': perms |= 2; break;
543 case 'E': perms |= 1; break;
544 default: DEBUGP(("wrong VMS permissons!\n"));
552 static struct fileinfo *
553 ftp_parse_vms_ls (const char *file)
556 /* #### A third copy of more-or-less the same array ? */
557 static const char *months[] = {
558 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
559 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
562 int year, month, day; /* for time analysis */
564 struct tm timestruct;
566 char *line, *tok, *p; /* tokenizer */
567 struct fileinfo *dir, *l, cur; /* list creation */
569 fp = fopen (file, "rb");
572 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
578 read_whole_line (fp);
579 /* "Directory PUB$DEVICE[PUB]" */
580 read_whole_line (fp);
582 read_whole_line (fp);
584 /* Line loop to end of file: */
585 while ((line = read_whole_line (fp)))
588 i = clean_line (line);
591 /* First column: Name. A bit of black magic again. The name my be
592 either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
593 line. Therefore we will first try to get the complete name
594 until the first space character; if it fails, we assume that the name
595 occupies the whole line. After that we search for the version
596 separator ";", we remove it and check the extension of the file;
597 extension .DIR denotes directory. */
599 tok = strtok(line, " ");
600 if (tok == NULL) tok = line;
601 DEBUGP(("file name: '%s'\n", tok));
602 for (p = tok ; *p && *p != ';' ; p++);
603 if (*p == ';') *p = '\0';
604 p = tok + strlen(tok) - 4;
605 if (!strcmp(p, ".DIR")) *p = '\0';
606 cur.name = xstrdup(tok);
607 DEBUGP(("Name: '%s'\n", cur.name));
609 /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
610 the file size to zero as the listing does tell us only the size in
611 filesystem blocks - for an integrity check (when mirroring, for
612 example) we would need the size in bytes. */
616 cur.type = FT_DIRECTORY;
618 DEBUGP(("Directory\n"));
622 cur.type = FT_PLAINFILE;
628 /* Second column, if exists, or the first column of the next line
629 contain file size in blocks. We will skip it. */
631 tok = strtok(NULL, " ");
634 DEBUGP(("Getting additional line\n"));
636 line = read_whole_line (fp);
639 DEBUGP(("empty line read, leaving listing parser\n"));
642 i = clean_line (line);
645 DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
648 tok = strtok(line, " ");
650 DEBUGP(("second token: '%s'\n", tok));
652 /* Third/Second column: Date DD-MMM-YYYY. */
654 tok = strtok(NULL, "-");
655 DEBUGP(("day: '%s'\n",tok));
657 tok = strtok(NULL, "-");
660 /* If the server produces garbage like
661 'EA95_0PS.GZ;1 No privilege for attempted operation'
662 the first strtok(NULL, "-") will return everything until the end
663 of the line and only the next strtok() call will return NULL. */
664 DEBUGP(("nonsense in VMS listing, skipping this line\n"));
667 for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
668 /* Uknown months are mapped to January */
670 tok = strtok(NULL, " ");
671 year = atoi(tok)-1900;
672 DEBUGP(("date parsed\n"));
674 /* Fourth/Third column: Time hh:mm[:ss] */
675 tok = strtok (NULL, " ");
676 hour = min = sec = 0;
679 for (; *p && *p != ':'; ++p);
682 for (; *p && *p != ':'; ++p);
686 DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
687 year+1900, month, day, hour, min, sec));
689 /* Build the time-stamp (copy & paste from above) */
690 timestruct.tm_sec = sec;
691 timestruct.tm_min = min;
692 timestruct.tm_hour = hour;
693 timestruct.tm_mday = day;
694 timestruct.tm_mon = month;
695 timestruct.tm_year = year;
696 timestruct.tm_wday = 0;
697 timestruct.tm_yday = 0;
698 timestruct.tm_isdst = -1;
699 cur.tstamp = mktime (×truct); /* store the time-stamp */
701 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
703 /* Skip the fifth column */
705 tok = strtok(NULL, " ");
707 /* Sixth column: Permissions */
709 tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
710 tok = strtok(NULL, ")");
713 DEBUGP(("confusing VMS permissions, skipping line\n"));
716 /* Permissons have the format "RWED,RWED,RE" */
717 cur.perms = vmsperms(tok);
718 DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
722 /* And put everything into the linked list */
725 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
726 memcpy (l, &cur, sizeof (cur));
727 l->prev = l->next = NULL;
732 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
734 memcpy (l, &cur, sizeof (cur));
746 /* This function switches between the correct parsing routine depending on
747 the SYSTEM_TYPE. The system type should be based on the result of the
748 "SYST" response of the FTP server. According to this repsonse we will
749 use on of the three different listing parsers that cover the most of FTP
750 servers used nowadays. */
753 ftp_parse_ls (const char *file, const enum stype system_type)
758 return ftp_parse_unix_ls (file, FALSE);
761 /* Detect whether the listing is simulating the UNIX format */
764 fp = fopen (file, "rb");
767 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
772 /* If the first character of the file is '0'-'9', it's WINNT
774 if (c >= '0' && c <='9')
775 return ftp_parse_winnt_ls (file);
777 return ftp_parse_unix_ls (file, TRUE);
780 return ftp_parse_vms_ls (file);
782 return ftp_parse_unix_ls (file, TRUE);
784 logprintf (LOG_NOTQUIET, _("\
785 Usupported listing type, trying Unix listing parser.\n"));
786 return ftp_parse_unix_ls (file, FALSE);
790 /* Stuff for creating FTP index. */
792 /* The function creates an HTML index containing references to given
793 directories and files on the appropriate host. The references are
796 ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
800 char *htclfile; /* HTML-clean file name */
804 fp = fopen (file, "wb");
807 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
815 char *tmpu, *tmpp; /* temporary, clean user and passwd */
817 tmpu = CLEANDUP (u->user);
818 tmpp = u->passwd ? CLEANDUP (u->passwd) : NULL;
819 upwd = (char *)xmalloc (strlen (tmpu)
820 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
821 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
827 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
828 fprintf (fp, "<html>\n<head>\n<title>");
829 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
830 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
831 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
832 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
838 /* #### Should we translate the months? */
839 static char *months[] = {
840 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
841 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
843 struct tm *ptm = localtime ((time_t *)&f->tstamp);
845 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
848 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
853 fprintf (fp, _("time unknown "));
857 fprintf (fp, _("File "));
860 fprintf (fp, _("Directory "));
863 fprintf (fp, _("Link "));
866 fprintf (fp, _("Not sure "));
869 htclfile = html_quote_string (f->name);
870 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
873 fprintf (fp, "%s", u->dir);
876 fprintf (fp, "%s", htclfile);
877 if (f->type == FT_DIRECTORY)
879 fprintf (fp, "\">%s", htclfile);
880 if (f->type == FT_DIRECTORY)
882 fprintf (fp, "</a> ");
883 if (f->type == FT_PLAINFILE)
884 fprintf (fp, _(" (%s bytes)"), legible (f->size));
885 else if (f->type == FT_SYMLINK)
886 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
891 fprintf (fp, "</pre>\n</body>\n</html>\n");