1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
32 #include <sys/types.h>
41 /* Undef this if FTPPARSE is not available. In that case, Wget will
42 still work with Unix FTP servers, which covers most cases. */
50 /* Converts symbolic permissions to number-style ones, e.g. string
51 rwxr-xr-x to 755. For now, it knows nothing of
52 setuid/setgid/sticky. ACLs are ignored. */
54 symperms (const char *s)
60 for (i = 0; i < 3; i++, s += 3)
63 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
64 (s[2] == 'x' || s[2] == 's'));
70 /* Convert the Un*x-ish style directory listing stored in FILE to a
71 linked list of fileinfo (system-independent) entries. The contents
72 of FILE are considered to be produced by the standard Unix `ls -la'
73 output (whatever that might be). BSD (no group) and SYSV (with
74 group) listings are handled.
76 The time stamps are stored in a separate variable, time_t
77 compatible (I hope). The timezones are ignored. */
78 static struct fileinfo *
79 ftp_parse_unix_ls (const char *file)
82 static const char *months[] = {
83 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
84 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
86 int next, len, i, error, ignore;
87 int year, month, day; /* for time analysis */
89 struct tm timestruct, *tnow;
92 char *line, *tok; /* tokenizer */
93 struct fileinfo *dir, *l, cur; /* list creation */
95 fp = fopen (file, "rb");
98 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
103 /* Line loop to end of file: */
104 while ((line = read_whole_line (fp)))
106 DEBUGP (("%s\n", line));
108 /* Destroy <CR><LF> if present. */
109 if (len && line[len - 1] == '\n')
111 if (len && line[len - 1] == '\r')
114 /* Skip if total... */
115 if (!strncasecmp (line, "total", 5))
120 /* Get the first token (permissions). */
121 tok = strtok (line, " ");
131 /* Decide whether we deal with a file or a directory. */
135 cur.type = FT_PLAINFILE;
136 DEBUGP (("PLAINFILE; "));
139 cur.type = FT_DIRECTORY;
140 DEBUGP (("DIRECTORY; "));
143 cur.type = FT_SYMLINK;
144 DEBUGP (("SYMLINK; "));
147 cur.type = FT_UNKNOWN;
148 DEBUGP (("UNKOWN; "));
152 cur.perms = symperms (tok + 1);
153 DEBUGP (("perms %0o; ", cur.perms));
155 error = ignore = 0; /* Errnoeous and ignoring entries are
156 treated equally for now. */
157 year = hour = min = sec = 0; /* Silence the compiler. */
160 /* While there are tokens on the line, parse them. Next is the
161 number of tokens left until the filename.
163 Use the month-name token as the "anchor" (the place where the
164 position wrt the file name is "known"). When a month name is
165 encountered, `next' is set to 5. Also, the preceding
166 characters are parsed to get the file size.
168 This tactic is quite dubious when it comes to
169 internationalization issues (non-English month names), but it
171 while ((tok = strtok (NULL, " ")))
174 if (next < 0) /* a month name was not encountered */
176 for (i = 0; i < 12; i++)
177 if (!strcmp (tok, months[i]))
179 /* If we got a month, it means the token before it is the
180 size, and the filename is three tokens away. */
186 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
187 cur.size += mul * (*t - '0');
190 /* Something is seriously wrong. */
196 DEBUGP (("month: %s; ", months[month]));
199 else if (next == 4) /* days */
201 if (tok[1]) /* two-digit... */
202 day = 10 * (*tok - '0') + tok[1] - '0';
203 else /* ...or one-digit */
205 DEBUGP (("day: %d; ", day));
209 /* This ought to be either the time, or the year. Let's
212 If we have a number x, it's a year. If we have x:y,
213 it's hours and minutes. If we have x:y:z, z are
216 min = hour = sec = 0;
217 /* We must deal with digits. */
220 /* Suppose it's year. */
221 for (; ISDIGIT (*tok); tok++)
222 year = (*tok - '0') + 10 * year;
225 /* This means these were hours! */
229 /* Get the minutes... */
230 for (; ISDIGIT (*tok); tok++)
231 min = (*tok - '0') + 10 * min;
234 /* ...and the seconds. */
236 for (; ISDIGIT (*tok); tok++)
237 sec = (*tok - '0') + 10 * sec;
242 DEBUGP (("year: %d (no tm); ", year));
244 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
246 else if (next == 2) /* The file name */
251 /* Since the file name may contain a SPC, it is possible
252 for strtok to handle it wrong. */
253 fnlen = strlen (tok);
254 if (fnlen < len - (tok - line))
256 /* So we have a SPC in the file name. Restore the
259 /* If the file is a symbolic link, it should have a
261 if (cur.type == FT_SYMLINK)
263 p = strstr (tok, " -> ");
269 cur.linkto = xstrdup (p + 4);
270 DEBUGP (("link to: %s\n", cur.linkto));
271 /* And separate it from the file name. */
275 /* If we have the filename, add it to the list of files or
277 /* "." and ".." are an exception! */
278 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
280 DEBUGP (("\nIgnoring `.' and `..'; "));
284 /* Some FTP sites choose to have ls -F as their default
285 LIST output, which marks the symlinks with a trailing
286 `@', directory names with a trailing `/' and
287 executables with a trailing `*'. This is no problem
288 unless encountering a symbolic link ending with `@',
289 or an executable ending with `*' on a server without
290 default -F output. I believe these cases are very
292 fnlen = strlen (tok); /* re-calculate `fnlen' */
293 cur.name = (char *)xmalloc (fnlen + 1);
294 memcpy (cur.name, tok, fnlen + 1);
297 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
299 cur.name[fnlen - 1] = '\0';
300 DEBUGP (("trailing `/' on dir.\n"));
302 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
304 cur.name[fnlen - 1] = '\0';
305 DEBUGP (("trailing `@' on link.\n"));
307 else if (cur.type == FT_PLAINFILE
308 && (cur.perms & 0111)
309 && cur.name[fnlen - 1] == '*')
311 cur.name[fnlen - 1] = '\0';
312 DEBUGP (("trailing `*' on exec.\n"));
323 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
330 DEBUGP (("Skipping.\n"));
331 FREE_MAYBE (cur.name);
332 FREE_MAYBE (cur.linkto);
339 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
340 memcpy (l, &cur, sizeof (cur));
341 l->prev = l->next = NULL;
346 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
348 memcpy (l, &cur, sizeof (cur));
351 /* Get the current time. */
352 timenow = time (NULL);
353 tnow = localtime (&timenow);
354 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
355 timestruct.tm_sec = sec;
356 timestruct.tm_min = min;
357 timestruct.tm_hour = hour;
358 timestruct.tm_mday = day;
359 timestruct.tm_mon = month;
362 /* Some listings will not specify the year if it is "obvious"
363 that the file was from the previous year. E.g. if today
364 is 97-01-12, and you see a file of Dec 15th, its year is
365 1996, not 1997. Thanks to Vladimir Volovich for
367 if (month > tnow->tm_mon)
368 timestruct.tm_year = tnow->tm_year - 1;
370 timestruct.tm_year = tnow->tm_year;
373 timestruct.tm_year = year;
374 if (timestruct.tm_year >= 1900)
375 timestruct.tm_year -= 1900;
376 timestruct.tm_wday = 0;
377 timestruct.tm_yday = 0;
378 timestruct.tm_isdst = -1;
379 l->tstamp = mktime (×truct); /* store the time-stamp */
390 /* This is a "glue function" that connects the ftpparse interface to
391 the interface Wget expects. ftpparse is used to parse listings
392 from servers other than Unix, like those running VMS or NT. */
394 static struct fileinfo *
395 ftp_parse_nonunix_ls (const char *file)
400 char *line; /* tokenizer */
401 struct fileinfo *dir, *l, cur; /* list creation */
403 fp = fopen (file, "rb");
406 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
411 /* Line loop to end of file: */
412 while ((line = read_whole_line (fp)))
416 DEBUGP (("%s\n", line));
418 /* Destroy <CR><LF> if present. */
419 if (len && line[len - 1] == '\n')
421 if (len && line[len - 1] == '\r')
424 if (ftpparse(&fp, line, len))
427 cur.name = (char *)xmalloc (fp.namelen + 1);
428 memcpy (cur.name, fp.name, fp.namelen);
429 cur.name[fp.namelen] = '\0';
430 DEBUGP (("%s\n", cur.name));
431 /* No links on non-UNIX systems */
433 /* ftpparse won't tell us correct permisions. So lets just invent
437 cur.type = FT_DIRECTORY;
442 cur.type = FT_PLAINFILE;
447 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
448 memcpy (l, &cur, sizeof (cur));
449 l->prev = l->next = NULL;
454 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
456 memcpy (l, &cur, sizeof (cur));
459 l->tstamp = fp.mtime;
470 /* This function switches between the correct parsing routine
471 depending on the SYSTEM_TYPE. If system type is ST_UNIX, we use
472 our home-grown ftp_parse_unix_ls; otherwise, we use our interface
473 to ftpparse, also known as ftp_parse_nonunix_ls. The system type
474 should be based on the result of the "SYST" response of the FTP
478 ftp_parse_ls (const char *file, const enum stype system_type)
480 if (system_type == ST_UNIX)
482 return ftp_parse_unix_ls (file);
487 return ftp_parse_nonunix_ls (file);
489 /* #### Maybe log some warning here? */
490 return ftp_parse_unix_ls (file);
495 /* Stuff for creating FTP index. */
497 /* The function returns the pointer to the malloc-ed quoted version of
498 string s. It will recognize and quote numeric and special graphic
499 entities, as per RFC1866:
506 No other entities are recognized or replaced. */
508 html_quote_string (const char *s)
514 /* Pass through the string, and count the new size. */
515 for (i = 0; *s; s++, i++)
519 else if (*s == '<' || *s == '>')
520 i += 3; /* `lt;' and `gt;' */
522 i += 5; /* `quot;' */
524 res = (char *)xmalloc (i + 1);
526 for (p = res; *s; s++)
539 *p++ = (*s == '<' ? 'l' : 'g');
559 /* The function creates an HTML index containing references to given
560 directories and files on the appropriate host. The references are
563 ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
567 char *htclfile; /* HTML-clean file name */
571 fp = fopen (file, "wb");
574 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
582 char *tmpu, *tmpp; /* temporary, clean user and passwd */
584 tmpu = CLEANDUP (u->user);
585 tmpp = u->passwd ? CLEANDUP (u->passwd) : NULL;
586 upwd = (char *)xmalloc (strlen (tmpu)
587 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
588 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
594 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
595 fprintf (fp, "<html>\n<head>\n<title>");
596 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
597 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
598 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
599 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
605 /* #### Should we translate the months? */
606 static char *months[] = {
607 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
608 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
610 struct tm *ptm = localtime ((time_t *)&f->tstamp);
612 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
615 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
620 fprintf (fp, _("time unknown "));
624 fprintf (fp, _("File "));
627 fprintf (fp, _("Directory "));
630 fprintf (fp, _("Link "));
633 fprintf (fp, _("Not sure "));
636 htclfile = html_quote_string (f->name);
637 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
640 fprintf (fp, "%s", u->dir);
643 fprintf (fp, "%s", htclfile);
644 if (f->type == FT_DIRECTORY)
646 fprintf (fp, "\">%s", htclfile);
647 if (f->type == FT_DIRECTORY)
649 fprintf (fp, "</a> ");
650 if (f->type == FT_PLAINFILE)
651 fprintf (fp, _(" (%s bytes)"), legible (f->size));
652 else if (f->type == FT_SYMLINK)
653 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
658 fprintf (fp, "</pre>\n</body>\n</html>\n");