1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
32 #include <sys/types.h>
41 /* Undef this if FTPPARSE is not available. In that case, Wget will
42 still work with Unix FTP servers, which covers most cases. */
50 /* Converts symbolic permissions to number-style ones, e.g. string
51 rwxr-xr-x to 755. For now, it knows nothing of
52 setuid/setgid/sticky. ACLs are ignored. */
54 symperms (const char *s)
60 for (i = 0; i < 3; i++, s += 3)
63 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
64 (s[2] == 'x' || s[2] == 's'));
70 /* Convert the Un*x-ish style directory listing stored in FILE to a
71 linked list of fileinfo (system-independent) entries. The contents
72 of FILE are considered to be produced by the standard Unix `ls -la'
73 output (whatever that might be). BSD (no group) and SYSV (with
74 group) listings are handled.
76 The time stamps are stored in a separate variable, time_t
77 compatible (I hope). The timezones are ignored. */
78 static struct fileinfo *
79 ftp_parse_unix_ls (const char *file, int ignore_perms)
82 static const char *months[] = {
83 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
84 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
86 int next, len, i, error, ignore;
87 int year, month, day; /* for time analysis */
89 struct tm timestruct, *tnow;
92 char *line, *tok; /* tokenizer */
93 struct fileinfo *dir, *l, cur; /* list creation */
95 fp = fopen (file, "rb");
98 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
103 /* Line loop to end of file: */
104 while ((line = read_whole_line (fp)))
106 DEBUGP (("%s\n", line));
108 /* Destroy <CR><LF> if present. */
109 if (len && line[len - 1] == '\n')
111 if (len && line[len - 1] == '\r')
114 /* Skip if total... */
115 if (!strncasecmp (line, "total", 5))
120 /* Get the first token (permissions). */
121 tok = strtok (line, " ");
131 /* Decide whether we deal with a file or a directory. */
135 cur.type = FT_PLAINFILE;
136 DEBUGP (("PLAINFILE; "));
139 cur.type = FT_DIRECTORY;
140 DEBUGP (("DIRECTORY; "));
143 cur.type = FT_SYMLINK;
144 DEBUGP (("SYMLINK; "));
147 cur.type = FT_UNKNOWN;
148 DEBUGP (("UNKOWN; "));
165 DEBUGP (("implicite perms %0o; ", cur.perms));
169 cur.perms = symperms (tok + 1);
170 DEBUGP (("perms %0o; ", cur.perms));
173 error = ignore = 0; /* Erroneous and ignoring entries are
174 treated equally for now. */
175 year = hour = min = sec = 0; /* Silence the compiler. */
178 /* While there are tokens on the line, parse them. Next is the
179 number of tokens left until the filename.
181 Use the month-name token as the "anchor" (the place where the
182 position wrt the file name is "known"). When a month name is
183 encountered, `next' is set to 5. Also, the preceding
184 characters are parsed to get the file size.
186 This tactic is quite dubious when it comes to
187 internationalization issues (non-English month names), but it
189 while ((tok = strtok (NULL, " ")))
192 if (next < 0) /* a month name was not encountered */
194 for (i = 0; i < 12; i++)
195 if (!strcmp (tok, months[i]))
197 /* If we got a month, it means the token before it is the
198 size, and the filename is three tokens away. */
204 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
205 cur.size += mul * (*t - '0');
208 /* Something is seriously wrong. */
214 DEBUGP (("month: %s; ", months[month]));
217 else if (next == 4) /* days */
219 if (tok[1]) /* two-digit... */
220 day = 10 * (*tok - '0') + tok[1] - '0';
221 else /* ...or one-digit */
223 DEBUGP (("day: %d; ", day));
227 /* This ought to be either the time, or the year. Let's
230 If we have a number x, it's a year. If we have x:y,
231 it's hours and minutes. If we have x:y:z, z are
234 min = hour = sec = 0;
235 /* We must deal with digits. */
238 /* Suppose it's year. */
239 for (; ISDIGIT (*tok); tok++)
240 year = (*tok - '0') + 10 * year;
243 /* This means these were hours! */
247 /* Get the minutes... */
248 for (; ISDIGIT (*tok); tok++)
249 min = (*tok - '0') + 10 * min;
252 /* ...and the seconds. */
254 for (; ISDIGIT (*tok); tok++)
255 sec = (*tok - '0') + 10 * sec;
260 DEBUGP (("year: %d (no tm); ", year));
262 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
264 else if (next == 2) /* The file name */
269 /* Since the file name may contain a SPC, it is possible
270 for strtok to handle it wrong. */
271 fnlen = strlen (tok);
272 if (fnlen < len - (tok - line))
274 /* So we have a SPC in the file name. Restore the
277 /* If the file is a symbolic link, it should have a
279 if (cur.type == FT_SYMLINK)
281 p = strstr (tok, " -> ");
287 cur.linkto = xstrdup (p + 4);
288 DEBUGP (("link to: %s\n", cur.linkto));
289 /* And separate it from the file name. */
293 /* If we have the filename, add it to the list of files or
295 /* "." and ".." are an exception! */
296 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
298 DEBUGP (("\nIgnoring `.' and `..'; "));
302 /* Some FTP sites choose to have ls -F as their default
303 LIST output, which marks the symlinks with a trailing
304 `@', directory names with a trailing `/' and
305 executables with a trailing `*'. This is no problem
306 unless encountering a symbolic link ending with `@',
307 or an executable ending with `*' on a server without
308 default -F output. I believe these cases are very
310 fnlen = strlen (tok); /* re-calculate `fnlen' */
311 cur.name = (char *)xmalloc (fnlen + 1);
312 memcpy (cur.name, tok, fnlen + 1);
315 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
317 cur.name[fnlen - 1] = '\0';
318 DEBUGP (("trailing `/' on dir.\n"));
320 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
322 cur.name[fnlen - 1] = '\0';
323 DEBUGP (("trailing `@' on link.\n"));
325 else if (cur.type == FT_PLAINFILE
326 && (cur.perms & 0111)
327 && cur.name[fnlen - 1] == '*')
329 cur.name[fnlen - 1] = '\0';
330 DEBUGP (("trailing `*' on exec.\n"));
341 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
348 DEBUGP (("Skipping.\n"));
349 FREE_MAYBE (cur.name);
350 FREE_MAYBE (cur.linkto);
357 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
358 memcpy (l, &cur, sizeof (cur));
359 l->prev = l->next = NULL;
364 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
366 memcpy (l, &cur, sizeof (cur));
369 /* Get the current time. */
370 timenow = time (NULL);
371 tnow = localtime (&timenow);
372 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
373 timestruct.tm_sec = sec;
374 timestruct.tm_min = min;
375 timestruct.tm_hour = hour;
376 timestruct.tm_mday = day;
377 timestruct.tm_mon = month;
380 /* Some listings will not specify the year if it is "obvious"
381 that the file was from the previous year. E.g. if today
382 is 97-01-12, and you see a file of Dec 15th, its year is
383 1996, not 1997. Thanks to Vladimir Volovich for
385 if (month > tnow->tm_mon)
386 timestruct.tm_year = tnow->tm_year - 1;
388 timestruct.tm_year = tnow->tm_year;
391 timestruct.tm_year = year;
392 if (timestruct.tm_year >= 1900)
393 timestruct.tm_year -= 1900;
394 timestruct.tm_wday = 0;
395 timestruct.tm_yday = 0;
396 timestruct.tm_isdst = -1;
397 l->tstamp = mktime (×truct); /* store the time-stamp */
406 static struct fileinfo *
407 ftp_parse_winnt_ls (const char *file)
411 int year, month, day; /* for time analysis */
413 struct tm timestruct;
415 char *line, *tok; /* tokenizer */
416 struct fileinfo *dir, *l, cur; /* list creation */
418 fp = fopen (file, "rb");
421 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
426 /* Line loop to end of file: */
427 while ((line = read_whole_line (fp)))
429 DEBUGP (("%s\n", line));
431 /* Destroy <CR><LF> if present. */
432 if (len && line[len - 1] == '\n')
434 if (len && line[len - 1] == '\r')
437 /* Extracting name is a bit of black magic and we have to do it
438 before `strtok' inserted extra \0 characters in the line
439 string. For the moment let us just suppose that the name starts at
440 column 39 of the listing. This way we could also recognize
441 filenames that begin with a series of space characters (but who
442 really wants to use such filenames anyway?). */
443 if (len < 40) continue;
445 cur.name = xstrdup(tok);
446 DEBUGP(("Name: '%s'\n", cur.name));
448 /* First column: mm-dd-yy */
449 tok = strtok(line, "-");
451 tok = strtok(NULL, "-");
453 tok = strtok(NULL, " ");
455 /* Assuming the epoch starting at 1.1.1970 */
456 if (year <= 70) year += 100;
458 /* Second column: hh:mm[AP]M */
459 tok = strtok(NULL, ":");
461 tok = strtok(NULL, "M");
463 /* Adjust hour from AM/PM */
465 if (*tok == 'P') hour += 12;
466 /* Listing does not contain value for seconds */
469 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
470 year+1900, month, day, hour, min));
472 /* Build the time-stamp (copy & paste from above) */
473 timestruct.tm_sec = sec;
474 timestruct.tm_min = min;
475 timestruct.tm_hour = hour;
476 timestruct.tm_mday = day;
477 timestruct.tm_mon = month;
478 timestruct.tm_year = year;
479 timestruct.tm_wday = 0;
480 timestruct.tm_yday = 0;
481 timestruct.tm_isdst = -1;
482 cur.tstamp = mktime (×truct); /* store the time-stamp */
484 DEBUGP(("Timestamp: %ld\n", cur.tstamp));
486 /* Third column: Either file length, or <DIR>. We also set the
487 permissions (guessed as 0644 for plain files and 0755 for
488 directories as the listing does not give us a clue) and filetype
490 tok = strtok(NULL, " ");
491 while (*tok == '\0') tok = strtok(NULL, " ");
494 cur.type = FT_DIRECTORY;
496 cur.perms = 493; /* my gcc does not like 0755 ?? */
497 DEBUGP(("Directory\n"));
501 cur.type = FT_PLAINFILE;
502 cur.size = atoi(tok);
503 cur.perms = 420; /* 0664 octal */
504 DEBUGP(("File, size %ld bytes\n", cur.size));
509 /* And put everything into the linked list */
512 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
513 memcpy (l, &cur, sizeof (cur));
514 l->prev = l->next = NULL;
519 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
521 memcpy (l, &cur, sizeof (cur));
535 /* This is a "glue function" that connects the ftpparse interface to
536 the interface Wget expects. ftpparse is used to parse listings
537 from servers other than Unix, like those running VMS or NT. */
539 static struct fileinfo *
540 ftp_parse_nonunix_ls (const char *file)
545 char *line; /* tokenizer */
546 struct fileinfo *dir, *l, cur; /* list creation */
548 fp = fopen (file, "rb");
551 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
556 /* Line loop to end of file: */
557 while ((line = read_whole_line (fp)))
561 DEBUGP (("%s\n", line));
563 /* Destroy <CR><LF> if present. */
564 if (len && line[len - 1] == '\n')
566 if (len && line[len - 1] == '\r')
569 if (ftpparse(&fp, line, len))
572 cur.name = (char *)xmalloc (fp.namelen + 1);
573 memcpy (cur.name, fp.name, fp.namelen);
574 cur.name[fp.namelen] = '\0';
575 DEBUGP (("%s\n", cur.name));
576 /* No links on non-UNIX systems */
578 /* ftpparse won't tell us correct permisions. So lets just invent
582 cur.type = FT_DIRECTORY;
587 cur.type = FT_PLAINFILE;
592 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
593 memcpy (l, &cur, sizeof (cur));
594 l->prev = l->next = NULL;
599 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
601 memcpy (l, &cur, sizeof (cur));
604 l->tstamp = fp.mtime;
615 /* This function switches between the correct parsing routine
616 depending on the SYSTEM_TYPE. If system type is ST_UNIX, we use
617 our home-grown ftp_parse_unix_ls; otherwise, we use our interface
618 to ftpparse, also known as ftp_parse_nonunix_ls. The system type
619 should be based on the result of the "SYST" response of the FTP
623 ftp_parse_ls (const char *file, const enum stype system_type)
628 return ftp_parse_unix_ls (file, FALSE);
631 /* Detect whether the listing is simulating the UNIX format */
634 fp = fopen (file, "rb");
637 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
642 /* If the first character of the file is '0'-'9', it's WINNT
644 if (c >= '0' && c <='9')
645 return ftp_parse_winnt_ls (file);
647 return ftp_parse_unix_ls (file, TRUE);
651 return ftp_parse_nonunix_ls (file);
653 /* #### Maybe log some warning here? */
654 return ftp_parse_unix_ls (file);
659 /* Stuff for creating FTP index. */
661 /* The function creates an HTML index containing references to given
662 directories and files on the appropriate host. The references are
665 ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
669 char *htclfile; /* HTML-clean file name */
673 fp = fopen (file, "wb");
676 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
684 char *tmpu, *tmpp; /* temporary, clean user and passwd */
686 tmpu = CLEANDUP (u->user);
687 tmpp = u->passwd ? CLEANDUP (u->passwd) : NULL;
688 upwd = (char *)xmalloc (strlen (tmpu)
689 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
690 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
696 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
697 fprintf (fp, "<html>\n<head>\n<title>");
698 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
699 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
700 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
701 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
707 /* #### Should we translate the months? */
708 static char *months[] = {
709 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
710 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
712 struct tm *ptm = localtime ((time_t *)&f->tstamp);
714 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
717 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
722 fprintf (fp, _("time unknown "));
726 fprintf (fp, _("File "));
729 fprintf (fp, _("Directory "));
732 fprintf (fp, _("Link "));
735 fprintf (fp, _("Not sure "));
738 htclfile = html_quote_string (f->name);
739 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
742 fprintf (fp, "%s", u->dir);
745 fprintf (fp, "%s", htclfile);
746 if (f->type == FT_DIRECTORY)
748 fprintf (fp, "\">%s", htclfile);
749 if (f->type == FT_DIRECTORY)
751 fprintf (fp, "</a> ");
752 if (f->type == FT_PLAINFILE)
753 fprintf (fp, _(" (%s bytes)"), legible (f->size));
754 else if (f->type == FT_SYMLINK)
755 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
760 fprintf (fp, "</pre>\n</body>\n</html>\n");