1 /* Parsing FTP `ls' output.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
32 #include <sys/types.h>
40 /* Converts symbolic permissions to number-style ones, e.g. string
41 rwxr-xr-x to 755. For now, it knows nothing of
42 setuid/setgid/sticky. ACLs are ignored. */
44 symperms (const char *s)
50 for (i = 0; i < 3; i++, s += 3)
53 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
54 (s[2] == 'x' || s[2] == 's'));
60 /* Convert the Un*x-ish style directory listing stored in FILE to a
61 linked list of fileinfo (system-independent) entries. The contents
62 of FILE are considered to be produced by the standard Unix `ls -la'
63 output (whatever that might be). BSD (no group) and SYSV (with
64 group) listings are handled.
66 The time stamps are stored in a separate variable, time_t
67 compatible (I hope). The timezones are ignored. */
68 static struct fileinfo *
69 ftp_parse_unix_ls (const char *file)
72 static const char *months[] = {
73 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
74 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
76 int next, len, i, error, ignore;
77 int year, month, day; /* for time analysis */
79 struct tm timestruct, *tnow;
82 char *line, *tok; /* tokenizer */
83 struct fileinfo *dir, *l, cur; /* list creation */
85 fp = fopen (file, "rb");
88 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
93 /* Line loop to end of file: */
94 while ((line = read_whole_line (fp)))
96 DEBUGP (("%s\n", line));
98 /* Destroy <CR><LF> if present. */
99 if (len && line[len - 1] == '\n')
101 if (len && line[len - 1] == '\r')
104 /* Skip if total... */
105 if (!strncasecmp (line, "total", 5))
110 /* Get the first token (permissions). */
111 tok = strtok (line, " ");
121 /* Decide whether we deal with a file or a directory. */
125 cur.type = FT_PLAINFILE;
126 DEBUGP (("PLAINFILE; "));
129 cur.type = FT_DIRECTORY;
130 DEBUGP (("DIRECTORY; "));
133 cur.type = FT_SYMLINK;
134 DEBUGP (("SYMLINK; "));
137 cur.type = FT_UNKNOWN;
138 DEBUGP (("UNKOWN; "));
142 cur.perms = symperms (tok + 1);
143 DEBUGP (("perms %0o; ", cur.perms));
145 error = ignore = 0; /* Errnoeous and ignoring entries are
146 treated equally for now. */
147 year = hour = min = sec = 0; /* Silence the compiler. */
150 /* While there are tokens on the line, parse them. Next is the
151 number of tokens left until the filename.
153 Use the month-name token as the "anchor" (the place where the
154 position wrt the file name is "known"). When a month name is
155 encountered, `next' is set to 5. Also, the preceding
156 characters are parsed to get the file size.
158 This tactic is quite dubious when it comes to
159 internationalization issues (non-English month names), but it
161 while ((tok = strtok (NULL, " ")))
164 if (next < 0) /* a month name was not encountered */
166 for (i = 0; i < 12; i++)
167 if (!strcmp (tok, months[i]))
169 /* If we got a month, it means the token before it is the
170 size, and the filename is three tokens away. */
176 for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
177 cur.size += mul * (*t - '0');
180 /* Something is seriously wrong. */
186 DEBUGP (("month: %s; ", months[month]));
189 else if (next == 4) /* days */
191 if (tok[1]) /* two-digit... */
192 day = 10 * (*tok - '0') + tok[1] - '0';
193 else /* ...or one-digit */
195 DEBUGP (("day: %d; ", day));
199 /* This ought to be either the time, or the year. Let's
202 If we have a number x, it's a year. If we have x:y,
203 it's hours and minutes. If we have x:y:z, z are
206 min = hour = sec = 0;
207 /* We must deal with digits. */
210 /* Suppose it's year. */
211 for (; ISDIGIT (*tok); tok++)
212 year = (*tok - '0') + 10 * year;
215 /* This means these were hours! */
219 /* Get the minutes... */
220 for (; ISDIGIT (*tok); tok++)
221 min = (*tok - '0') + 10 * min;
224 /* ...and the seconds. */
226 for (; ISDIGIT (*tok); tok++)
227 sec = (*tok - '0') + 10 * sec;
232 DEBUGP (("year: %d (no tm); ", year));
234 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
236 else if (next == 2) /* The file name */
241 /* Since the file name may contain a SPC, it is possible
242 for strtok to handle it wrong. */
243 fnlen = strlen (tok);
244 if (fnlen < len - (tok - line))
246 /* So we have a SPC in the file name. Restore the
249 /* If the file is a symbolic link, it should have a
251 if (cur.type == FT_SYMLINK)
253 p = strstr (tok, " -> ");
259 cur.linkto = xstrdup (p + 4);
260 DEBUGP (("link to: %s\n", cur.linkto));
261 /* And separate it from the file name. */
265 /* If we have the filename, add it to the list of files or
267 /* "." and ".." are an exception! */
268 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
270 DEBUGP (("\nIgnoring `.' and `..'; "));
274 /* Some FTP sites choose to have ls -F as their default
275 LIST output, which marks the symlinks with a trailing
276 `@', directory names with a trailing `/' and
277 executables with a trailing `*'. This is no problem
278 unless encountering a symbolic link ending with `@',
279 or an executable ending with `*' on a server without
280 default -F output. I believe these cases are very
282 fnlen = strlen (tok); /* re-calculate `fnlen' */
283 cur.name = (char *)xmalloc (fnlen + 1);
284 memcpy (cur.name, tok, fnlen + 1);
287 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
289 cur.name[fnlen - 1] = '\0';
290 DEBUGP (("trailing `/' on dir.\n"));
292 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
294 cur.name[fnlen - 1] = '\0';
295 DEBUGP (("trailing `@' on link.\n"));
297 else if (cur.type == FT_PLAINFILE
298 && (cur.perms & 0111)
299 && cur.name[fnlen - 1] == '*')
301 cur.name[fnlen - 1] = '\0';
302 DEBUGP (("trailing `*' on exec.\n"));
313 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
320 DEBUGP (("Skipping.\n"));
321 FREE_MAYBE (cur.name);
322 FREE_MAYBE (cur.linkto);
329 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
330 memcpy (l, &cur, sizeof (cur));
331 l->prev = l->next = NULL;
336 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
338 memcpy (l, &cur, sizeof (cur));
341 /* Get the current time. */
342 timenow = time (NULL);
343 tnow = localtime (&timenow);
344 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
345 timestruct.tm_sec = sec;
346 timestruct.tm_min = min;
347 timestruct.tm_hour = hour;
348 timestruct.tm_mday = day;
349 timestruct.tm_mon = month;
352 /* Some listings will not specify the year if it is "obvious"
353 that the file was from the previous year. E.g. if today
354 is 97-01-12, and you see a file of Dec 15th, its year is
355 1996, not 1997. Thanks to Vladimir Volovich for
357 if (month > tnow->tm_mon)
358 timestruct.tm_year = tnow->tm_year - 1;
360 timestruct.tm_year = tnow->tm_year;
363 timestruct.tm_year = year;
364 if (timestruct.tm_year >= 1900)
365 timestruct.tm_year -= 1900;
366 timestruct.tm_wday = 0;
367 timestruct.tm_yday = 0;
368 timestruct.tm_isdst = -1;
369 l->tstamp = mktime (×truct); /* store the time-stamp */
378 /* This function is just a stub. It should actually accept some kind
379 of information what system it is running on -- e.g. FPL_UNIX,
380 FPL_DOS, FPL_NT, FPL_VMS, etc. and a "guess-me" value, like
381 FPL_GUESS. Then it would call the appropriate parsers to fill up
384 Since we currently support only the Unix FTP servers, this function
385 simply returns the result of ftp_parse_unix_ls(). */
387 ftp_parse_ls (const char *file)
389 return ftp_parse_unix_ls (file);