1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
32 #include <sys/types.h>
40 /* Converts symbolic permissions to number-style ones, e.g. string
41 rwxr-xr-x to 755. For now, it knows nothing of
42 setuid/setgid/sticky. ACLs are ignored. */
44 symperms (const char *s)
50 for (i = 0; i < 3; i++, s += 3)
53 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
54 (s[2] == 'x' || s[2] == 's'));
60 /* Convert the Un*x-ish style directory listing stored in FILE to a
61 linked list of fileinfo (system-independent) entries. The contents
62 of FILE are considered to be produced by the standard Unix `ls -la'
63 output (whatever that might be). BSD (no group) and SYSV (with
64 group) listings are handled.
66 The time stamps are stored in a separate variable, time_t
67 compatible (I hope). The timezones are ignored. */
68 static struct fileinfo *
69 ftp_parse_unix_ls (const char *file)
72 static const char *months[] = {
73 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
74 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
76 int next, len, i, error, ignore;
77 int year, month, day; /* for time analysis */
79 struct tm timestruct, *tnow;
82 char *line, *tok; /* tokenizer */
83 struct fileinfo *dir, *l, cur; /* list creation */
85 fp = fopen (file, "rb");
88 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
93 /* Line loop to end of file: */
94 while ((line = read_whole_line (fp)))
96 DEBUGP (("%s\n", line));
98 /* Destroy <CR> if there is one. */
99 if (len && line[len - 1] == '\r')
102 /* Skip if total... */
103 if (!strncasecmp (line, "total", 5))
108 /* Get the first token (permissions). */
109 tok = strtok (line, " ");
119 /* Decide whether we deal with a file or a directory. */
123 cur.type = FT_PLAINFILE;
124 DEBUGP (("PLAINFILE; "));
127 cur.type = FT_DIRECTORY;
128 DEBUGP (("DIRECTORY; "));
131 cur.type = FT_SYMLINK;
132 DEBUGP (("SYMLINK; "));
135 cur.type = FT_UNKNOWN;
136 DEBUGP (("UNKOWN; "));
140 cur.perms = symperms (tok + 1);
141 DEBUGP (("perms %0o; ", cur.perms));
143 error = ignore = 0; /* Errnoeous and ignoring entries are
144 treated equally for now. */
145 year = hour = min = sec = 0; /* Silence the compiler. */
148 /* While there are tokens on the line, parse them. Next is the
149 number of tokens left until the filename.
151 Use the month-name token as the "anchor" (the place where the
152 position wrt the file name is "known"). When a month name is
153 encountered, `next' is set to 5. Also, the preceding
154 characters are parsed to get the file size.
156 This tactic is quite dubious when it comes to
157 internationalization issues (non-English month names), but it
159 while ((tok = strtok (NULL, " ")))
162 if (next < 0) /* a month name was not encountered */
164 for (i = 0; i < 12; i++)
165 if (!strcmp (tok, months[i]))
167 /* If we got a month, it means the token before it is the
168 size, and the filename is three tokens away. */
174 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
175 cur.size += mul * (*t - '0');
178 /* Something is seriously wrong. */
184 DEBUGP (("month: %s; ", months[month]));
187 else if (next == 4) /* days */
189 if (tok[1]) /* two-digit... */
190 day = 10 * (*tok - '0') + tok[1] - '0';
191 else /* ...or one-digit */
193 DEBUGP (("day: %d; ", day));
197 /* This ought to be either the time, or the year. Let's
200 If we have a number x, it's a year. If we have x:y,
201 it's hours and minutes. If we have x:y:z, z are
204 min = hour = sec = 0;
205 /* We must deal with digits. */
208 /* Suppose it's year. */
209 for (; ISDIGIT (*tok); tok++)
210 year = (*tok - '0') + 10 * year;
213 /* This means these were hours! */
217 /* Get the minutes... */
218 for (; ISDIGIT (*tok); tok++)
219 min = (*tok - '0') + 10 * min;
222 /* ...and the seconds. */
224 for (; ISDIGIT (*tok); tok++)
225 sec = (*tok - '0') + 10 * sec;
230 DEBUGP (("year: %d (no tm); ", year));
232 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
234 else if (next == 2) /* The file name */
239 /* Since the file name may contain a SPC, it is possible
240 for strtok to handle it wrong. */
241 fnlen = strlen (tok);
242 if (fnlen < len - (tok - line))
244 /* So we have a SPC in the file name. Restore the
247 /* If the file is a symbolic link, it should have a
249 if (cur.type == FT_SYMLINK)
251 p = strstr (tok, " -> ");
257 cur.linkto = xstrdup (p + 4);
258 DEBUGP (("link to: %s\n", cur.linkto));
259 /* And separate it from the file name. */
263 /* If we have the filename, add it to the list of files or
265 /* "." and ".." are an exception! */
266 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
268 DEBUGP (("\nIgnoring `.' and `..'; "));
272 /* Some FTP sites choose to have ls -F as their default
273 LIST output, which marks the symlinks with a trailing
274 `@', directory names with a trailing `/' and
275 executables with a trailing `*'. This is no problem
276 unless encountering a symbolic link ending with `@',
277 or an executable ending with `*' on a server without
278 default -F output. I believe these cases are very
280 fnlen = strlen (tok); /* re-calculate `fnlen' */
281 cur.name = (char *)xmalloc (fnlen + 1);
282 memcpy (cur.name, tok, fnlen + 1);
285 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
287 cur.name[fnlen - 1] = '\0';
288 DEBUGP (("trailing `/' on dir.\n"));
290 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
292 cur.name[fnlen - 1] = '\0';
293 DEBUGP (("trailing `@' on link.\n"));
295 else if (cur.type == FT_PLAINFILE
296 && (cur.perms & 0111)
297 && cur.name[fnlen - 1] == '*')
299 cur.name[fnlen - 1] = '\0';
300 DEBUGP (("trailing `*' on exec.\n"));
311 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
318 DEBUGP (("Skipping.\n"));
319 FREE_MAYBE (cur.name);
320 FREE_MAYBE (cur.linkto);
327 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
328 memcpy (l, &cur, sizeof (cur));
329 l->prev = l->next = NULL;
334 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
336 memcpy (l, &cur, sizeof (cur));
339 /* Get the current time. */
340 timenow = time (NULL);
341 tnow = localtime (&timenow);
342 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
343 timestruct.tm_sec = sec;
344 timestruct.tm_min = min;
345 timestruct.tm_hour = hour;
346 timestruct.tm_mday = day;
347 timestruct.tm_mon = month;
350 /* Some listings will not specify the year if it is "obvious"
351 that the file was from the previous year. E.g. if today
352 is 97-01-12, and you see a file of Dec 15th, its year is
353 1996, not 1997. Thanks to Vladimir Volovich for
355 if (month > tnow->tm_mon)
356 timestruct.tm_year = tnow->tm_year - 1;
358 timestruct.tm_year = tnow->tm_year;
361 timestruct.tm_year = year;
362 if (timestruct.tm_year >= 1900)
363 timestruct.tm_year -= 1900;
364 timestruct.tm_wday = 0;
365 timestruct.tm_yday = 0;
366 timestruct.tm_isdst = -1;
367 l->tstamp = mktime (×truct); /* store the time-stamp */
376 /* This function is just a stub. It should actually accept some kind
377 of information what system it is running on -- e.g. FPL_UNIX,
378 FPL_DOS, FPL_NT, FPL_VMS, etc. and a "guess-me" value, like
379 FPL_GUESS. Then it would call the appropriate parsers to fill up
382 Since we currently support only the Unix FTP servers, this function
383 simply returns the result of ftp_parse_unix_ls(). */
385 ftp_parse_ls (const char *file)
387 return ftp_parse_unix_ls (file);