1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
32 #include <sys/types.h>
41 /* Converts symbolic permissions to number-style ones, e.g. string
42 rwxr-xr-x to 755. For now, it knows nothing of
43 setuid/setgid/sticky. ACLs are ignored. */
45 symperms (const char *s)
51 for (i = 0; i < 3; i++, s += 3)
54 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
55 (s[2] == 'x' || s[2] == 's'));
61 /* Convert the Un*x-ish style directory listing stored in FILE to a
62 linked list of fileinfo (system-independent) entries. The contents
63 of FILE are considered to be produced by the standard Unix `ls -la'
64 output (whatever that might be). BSD (no group) and SYSV (with
65 group) listings are handled.
67 The time stamps are stored in a separate variable, time_t
68 compatible (I hope). The timezones are ignored. */
69 static struct fileinfo *
70 ftp_parse_unix_ls (const char *file)
73 static const char *months[] = {
74 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
75 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
77 int next, len, i, error, ignore;
78 int year, month, day; /* for time analysis */
80 struct tm timestruct, *tnow;
83 char *line, *tok; /* tokenizer */
84 struct fileinfo *dir, *l, cur; /* list creation */
86 fp = fopen (file, "rb");
89 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
94 /* Line loop to end of file: */
95 while ((line = read_whole_line (fp)))
97 DEBUGP (("%s\n", line));
99 /* Destroy <CR><LF> if present. */
100 if (len && line[len - 1] == '\n')
102 if (len && line[len - 1] == '\r')
105 /* Skip if total... */
106 if (!strncasecmp (line, "total", 5))
111 /* Get the first token (permissions). */
112 tok = strtok (line, " ");
122 /* Decide whether we deal with a file or a directory. */
126 cur.type = FT_PLAINFILE;
127 DEBUGP (("PLAINFILE; "));
130 cur.type = FT_DIRECTORY;
131 DEBUGP (("DIRECTORY; "));
134 cur.type = FT_SYMLINK;
135 DEBUGP (("SYMLINK; "));
138 cur.type = FT_UNKNOWN;
139 DEBUGP (("UNKOWN; "));
143 cur.perms = symperms (tok + 1);
144 DEBUGP (("perms %0o; ", cur.perms));
146 error = ignore = 0; /* Errnoeous and ignoring entries are
147 treated equally for now. */
148 year = hour = min = sec = 0; /* Silence the compiler. */
151 /* While there are tokens on the line, parse them. Next is the
152 number of tokens left until the filename.
154 Use the month-name token as the "anchor" (the place where the
155 position wrt the file name is "known"). When a month name is
156 encountered, `next' is set to 5. Also, the preceding
157 characters are parsed to get the file size.
159 This tactic is quite dubious when it comes to
160 internationalization issues (non-English month names), but it
162 while ((tok = strtok (NULL, " ")))
165 if (next < 0) /* a month name was not encountered */
167 for (i = 0; i < 12; i++)
168 if (!strcmp (tok, months[i]))
170 /* If we got a month, it means the token before it is the
171 size, and the filename is three tokens away. */
177 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
178 cur.size += mul * (*t - '0');
181 /* Something is seriously wrong. */
187 DEBUGP (("month: %s; ", months[month]));
190 else if (next == 4) /* days */
192 if (tok[1]) /* two-digit... */
193 day = 10 * (*tok - '0') + tok[1] - '0';
194 else /* ...or one-digit */
196 DEBUGP (("day: %d; ", day));
200 /* This ought to be either the time, or the year. Let's
203 If we have a number x, it's a year. If we have x:y,
204 it's hours and minutes. If we have x:y:z, z are
207 min = hour = sec = 0;
208 /* We must deal with digits. */
211 /* Suppose it's year. */
212 for (; ISDIGIT (*tok); tok++)
213 year = (*tok - '0') + 10 * year;
216 /* This means these were hours! */
220 /* Get the minutes... */
221 for (; ISDIGIT (*tok); tok++)
222 min = (*tok - '0') + 10 * min;
225 /* ...and the seconds. */
227 for (; ISDIGIT (*tok); tok++)
228 sec = (*tok - '0') + 10 * sec;
233 DEBUGP (("year: %d (no tm); ", year));
235 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
237 else if (next == 2) /* The file name */
242 /* Since the file name may contain a SPC, it is possible
243 for strtok to handle it wrong. */
244 fnlen = strlen (tok);
245 if (fnlen < len - (tok - line))
247 /* So we have a SPC in the file name. Restore the
250 /* If the file is a symbolic link, it should have a
252 if (cur.type == FT_SYMLINK)
254 p = strstr (tok, " -> ");
260 cur.linkto = xstrdup (p + 4);
261 DEBUGP (("link to: %s\n", cur.linkto));
262 /* And separate it from the file name. */
266 /* If we have the filename, add it to the list of files or
268 /* "." and ".." are an exception! */
269 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
271 DEBUGP (("\nIgnoring `.' and `..'; "));
275 /* Some FTP sites choose to have ls -F as their default
276 LIST output, which marks the symlinks with a trailing
277 `@', directory names with a trailing `/' and
278 executables with a trailing `*'. This is no problem
279 unless encountering a symbolic link ending with `@',
280 or an executable ending with `*' on a server without
281 default -F output. I believe these cases are very
283 fnlen = strlen (tok); /* re-calculate `fnlen' */
284 cur.name = (char *)xmalloc (fnlen + 1);
285 memcpy (cur.name, tok, fnlen + 1);
288 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
290 cur.name[fnlen - 1] = '\0';
291 DEBUGP (("trailing `/' on dir.\n"));
293 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
295 cur.name[fnlen - 1] = '\0';
296 DEBUGP (("trailing `@' on link.\n"));
298 else if (cur.type == FT_PLAINFILE
299 && (cur.perms & 0111)
300 && cur.name[fnlen - 1] == '*')
302 cur.name[fnlen - 1] = '\0';
303 DEBUGP (("trailing `*' on exec.\n"));
314 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
321 DEBUGP (("Skipping.\n"));
322 FREE_MAYBE (cur.name);
323 FREE_MAYBE (cur.linkto);
330 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
331 memcpy (l, &cur, sizeof (cur));
332 l->prev = l->next = NULL;
337 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
339 memcpy (l, &cur, sizeof (cur));
342 /* Get the current time. */
343 timenow = time (NULL);
344 tnow = localtime (&timenow);
345 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
346 timestruct.tm_sec = sec;
347 timestruct.tm_min = min;
348 timestruct.tm_hour = hour;
349 timestruct.tm_mday = day;
350 timestruct.tm_mon = month;
353 /* Some listings will not specify the year if it is "obvious"
354 that the file was from the previous year. E.g. if today
355 is 97-01-12, and you see a file of Dec 15th, its year is
356 1996, not 1997. Thanks to Vladimir Volovich for
358 if (month > tnow->tm_mon)
359 timestruct.tm_year = tnow->tm_year - 1;
361 timestruct.tm_year = tnow->tm_year;
364 timestruct.tm_year = year;
365 if (timestruct.tm_year >= 1900)
366 timestruct.tm_year -= 1900;
367 timestruct.tm_wday = 0;
368 timestruct.tm_yday = 0;
369 timestruct.tm_isdst = -1;
370 l->tstamp = mktime (×truct); /* store the time-stamp */
379 /* This function is just a stub. It should actually accept some kind
380 of information what system it is running on -- e.g. FPL_UNIX,
381 FPL_DOS, FPL_NT, FPL_VMS, etc. and a "guess-me" value, like
382 FPL_GUESS. Then it would call the appropriate parsers to fill up
385 Since we currently support only the Unix FTP servers, this function
386 simply returns the result of ftp_parse_unix_ls(). */
388 ftp_parse_ls (const char *file)
390 return ftp_parse_unix_ls (file);
393 /* Stuff for creating FTP index. */
395 /* The function returns the pointer to the malloc-ed quoted version of
396 string s. It will recognize and quote numeric and special graphic
397 entities, as per RFC1866:
404 No other entities are recognized or replaced. */
406 html_quote_string (const char *s)
412 /* Pass through the string, and count the new size. */
413 for (i = 0; *s; s++, i++)
417 else if (*s == '<' || *s == '>')
418 i += 3; /* `lt;' and `gt;' */
420 i += 5; /* `quot;' */
422 res = (char *)xmalloc (i + 1);
424 for (p = res; *s; s++)
437 *p++ = (*s == '<' ? 'l' : 'g');
457 /* The function creates an HTML index containing references to given
458 directories and files on the appropriate host. The references are
461 ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
465 char *htclfile; /* HTML-clean file name */
469 fp = fopen (file, "wb");
472 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
480 char *tmpu, *tmpp; /* temporary, clean user and passwd */
482 tmpu = CLEANDUP (u->user);
483 tmpp = u->passwd ? CLEANDUP (u->passwd) : NULL;
484 upwd = (char *)xmalloc (strlen (tmpu)
485 + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
486 sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
492 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
493 fprintf (fp, "<html>\n<head>\n<title>");
494 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
495 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
496 fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
497 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
503 /* #### Should we translate the months? */
504 static char *months[] = {
505 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
506 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
508 struct tm *ptm = localtime ((time_t *)&f->tstamp);
510 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
513 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
518 fprintf (fp, _("time unknown "));
522 fprintf (fp, _("File "));
525 fprintf (fp, _("Directory "));
528 fprintf (fp, _("Link "));
531 fprintf (fp, _("Not sure "));
534 htclfile = html_quote_string (f->name);
535 fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
538 fprintf (fp, "%s", u->dir);
541 fprintf (fp, "%s", htclfile);
542 if (f->type == FT_DIRECTORY)
544 fprintf (fp, "\">%s", htclfile);
545 if (f->type == FT_DIRECTORY)
547 fprintf (fp, "</a> ");
548 if (f->type == FT_PLAINFILE)
549 fprintf (fp, _(" (%s bytes)"), legible (f->size));
550 else if (f->type == FT_SYMLINK)
551 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
556 fprintf (fp, "</pre>\n</body>\n</html>\n");