/* Parsing FTP `ls' output.
- Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of Wget.
#include "ftp.h"
#include "url.h"
+/* Undef this if FTPPARSE is not available. In that case, Wget will
+ still work with Unix FTP servers, which covers most cases. */
+
+#define HAVE_FTPPARSE
+
+#ifdef HAVE_FTPPARSE
+#include "ftpparse.h"
+#endif
+
/* Converts symbolic permissions to number-style ones, e.g. string
rwxr-xr-x to 755. For now, it knows nothing of
setuid/setgid/sticky. ACLs are ignored. */
The time stamps are stored in a separate variable, time_t
compatible (I hope). The timezones are ignored. */
static struct fileinfo *
-ftp_parse_unix_ls (const char *file)
+ftp_parse_unix_ls (const char *file, int ignore_perms)
{
FILE *fp;
static const char *months[] = {
/* Skip if total... */
if (!strncasecmp (line, "total", 5))
{
- free (line);
+ xfree (line);
continue;
}
/* Get the first token (permissions). */
tok = strtok (line, " ");
if (!tok)
{
- free (line);
+ xfree (line);
continue;
}
break;
}
- cur.perms = symperms (tok + 1);
- DEBUGP (("perms %0o; ", cur.perms));
+ if (ignore_perms)
+ {
+ switch (cur.type)
+ {
+ case FT_PLAINFILE:
+ cur.perms = 420;
+ break;
+ case FT_DIRECTORY:
+ cur.perms = 493;
+ break;
+ default:
+ cur.perms = 1023;
+ }
+ DEBUGP (("implicite perms %0o; ", cur.perms));
+ }
+ else
+ {
+ cur.perms = symperms (tok + 1);
+ DEBUGP (("perms %0o; ", cur.perms));
+ }
- error = ignore = 0; /* Errnoeous and ignoring entries are
+ error = ignore = 0; /* Erroneous and ignoring entries are
treated equally for now. */
year = hour = min = sec = 0; /* Silence the compiler. */
month = day = 0;
DEBUGP (("Skipping.\n"));
FREE_MAYBE (cur.name);
FREE_MAYBE (cur.linkto);
- free (line);
+ xfree (line);
continue;
}
timestruct.tm_isdst = -1;
l->tstamp = mktime (×truct); /* store the time-stamp */
- free (line);
+ xfree (line);
}
fclose (fp);
return dir;
}
-/* This function is just a stub. It should actually accept some kind
- of information what system it is running on -- e.g. FPL_UNIX,
- FPL_DOS, FPL_NT, FPL_VMS, etc. and a "guess-me" value, like
- FPL_GUESS. Then it would call the appropriate parsers to fill up
- fileinfos.
-
- Since we currently support only the Unix FTP servers, this function
- simply returns the result of ftp_parse_unix_ls(). */
-struct fileinfo *
-ftp_parse_ls (const char *file)
+static struct fileinfo *
+ftp_parse_winnt_ls (const char *file)
{
- return ftp_parse_unix_ls (file);
+ FILE *fp;
+ int len;
+ int year, month, day; /* for time analysis */
+ int hour, min, sec;
+ struct tm timestruct;
+
+ char *line, *tok; /* tokenizer */
+ struct fileinfo *dir, *l, cur; /* list creation */
+
+ fp = fopen (file, "rb");
+ if (!fp)
+ {
+ logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
+ return NULL;
+ }
+ dir = l = NULL;
+
+ /* Line loop to end of file: */
+ while ((line = read_whole_line (fp)))
+ {
+ DEBUGP (("%s\n", line));
+ len = strlen (line);
+ /* Destroy <CR><LF> if present. */
+ if (len && line[len - 1] == '\n')
+ line[--len] = '\0';
+ if (len && line[len - 1] == '\r')
+ line[--len] = '\0';
+
+ /* Extracting name is a bit of black magic and we have to do it
+ before `strtok' inserted extra \0 characters in the line
+ string. For the moment let us just suppose that the name starts at
+ column 39 of the listing. This way we could also recognize
+ filenames that begin with a series of space characters (but who
+ really wants to use such filenames anyway?). */
+ if (len < 40) continue;
+ tok = line + 39;
+ cur.name = xstrdup(tok);
+ DEBUGP(("Name: '%s'\n", cur.name));
+
+ /* First column: mm-dd-yy */
+ tok = strtok(line, "-");
+ month = atoi(tok);
+ tok = strtok(NULL, "-");
+ day = atoi(tok);
+ tok = strtok(NULL, " ");
+ year = atoi(tok);
+ /* Assuming the epoch starting at 1.1.1970 */
+ if (year <= 70) year += 100;
+
+ /* Second column: hh:mm[AP]M */
+ tok = strtok(NULL, ":");
+ hour = atoi(tok);
+ tok = strtok(NULL, "M");
+ min = atoi(tok);
+ /* Adjust hour from AM/PM */
+ tok+=2;
+ if (*tok == 'P') hour += 12;
+ /* Listing does not contain value for seconds */
+ sec = 0;
+
+ DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
+ year+1900, month, day, hour, min));
+
+ /* Build the time-stamp (copy & paste from above) */
+ timestruct.tm_sec = sec;
+ timestruct.tm_min = min;
+ timestruct.tm_hour = hour;
+ timestruct.tm_mday = day;
+ timestruct.tm_mon = month;
+ timestruct.tm_year = year;
+ timestruct.tm_wday = 0;
+ timestruct.tm_yday = 0;
+ timestruct.tm_isdst = -1;
+ cur.tstamp = mktime (×truct); /* store the time-stamp */
+
+ DEBUGP(("Timestamp: %ld\n", cur.tstamp));
+
+ /* Third column: Either file length, or <DIR>. We also set the
+ permissions (guessed as 0644 for plain files and 0755 for
+ directories as the listing does not give us a clue) and filetype
+ here. */
+ tok = strtok(NULL, " ");
+ while (*tok == '\0') tok = strtok(NULL, " ");
+ if (*tok == '<')
+ {
+ cur.type = FT_DIRECTORY;
+ cur.size = 0;
+ cur.perms = 493; /* my gcc does not like 0755 ?? */
+ DEBUGP(("Directory\n"));
+ }
+ else
+ {
+ cur.type = FT_PLAINFILE;
+ cur.size = atoi(tok);
+ cur.perms = 420; /* 0664 octal */
+ DEBUGP(("File, size %ld bytes\n", cur.size));
+ }
+
+ cur.linkto = NULL;
+
+ /* And put everything into the linked list */
+ if (!dir)
+ {
+ l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ memcpy (l, &cur, sizeof (cur));
+ l->prev = l->next = NULL;
+ }
+ else
+ {
+ cur.prev = l;
+ l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ l = l->next;
+ memcpy (l, &cur, sizeof (cur));
+ l->next = NULL;
+ }
+
+ xfree(line);
+ }
+
+ fclose(fp);
+ return dir;
}
-\f
-/* Stuff for creating FTP index. */
-/* The function returns the pointer to the malloc-ed quoted version of
- string s. It will recognize and quote numeric and special graphic
- entities, as per RFC1866:
- `&' -> `&'
- `<' -> `<'
- `>' -> `>'
- `"' -> `"'
+#ifdef HAVE_FTPPARSE
+
+/* This is a "glue function" that connects the ftpparse interface to
+ the interface Wget expects. ftpparse is used to parse listings
+ from servers other than Unix, like those running VMS or NT. */
- No other entities are recognized or replaced. */
-static char *
-html_quote_string (const char *s)
+static struct fileinfo *
+ftp_parse_nonunix_ls (const char *file)
{
- const char *b = s;
- char *p, *res;
- int i;
+ FILE *fp;
+ int len;
+
+ char *line; /* tokenizer */
+ struct fileinfo *dir, *l, cur; /* list creation */
- /* Pass through the string, and count the new size. */
- for (i = 0; *s; s++, i++)
+ fp = fopen (file, "rb");
+ if (!fp)
{
- if (*s == '&')
- i += 4; /* `amp;' */
- else if (*s == '<' || *s == '>')
- i += 3; /* `lt;' and `gt;' */
- else if (*s == '\"')
- i += 5; /* `quot;' */
+ logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
+ return NULL;
}
- res = (char *)xmalloc (i + 1);
- s = b;
- for (p = res; *s; s++)
+ dir = l = NULL;
+
+ /* Line loop to end of file: */
+ while ((line = read_whole_line (fp)))
{
- switch (*s)
+ struct ftpparse fp;
+
+ DEBUGP (("%s\n", line));
+ len = strlen (line);
+ /* Destroy <CR><LF> if present. */
+ if (len && line[len - 1] == '\n')
+ line[--len] = '\0';
+ if (len && line[len - 1] == '\r')
+ line[--len] = '\0';
+
+ if (ftpparse(&fp, line, len))
+ {
+ cur.size = fp.size;
+ cur.name = (char *)xmalloc (fp.namelen + 1);
+ memcpy (cur.name, fp.name, fp.namelen);
+ cur.name[fp.namelen] = '\0';
+ DEBUGP (("%s\n", cur.name));
+ /* No links on non-UNIX systems */
+ cur.linkto = NULL;
+ /* ftpparse won't tell us correct permisions. So lets just invent
+ something. */
+ if (fp.flagtrycwd)
+ {
+ cur.type = FT_DIRECTORY;
+ cur.perms = 0755;
+ }
+ else
+ {
+ cur.type = FT_PLAINFILE;
+ cur.perms = 0644;
+ }
+ if (!dir)
+ {
+ l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ memcpy (l, &cur, sizeof (cur));
+ l->prev = l->next = NULL;
+ }
+ else
+ {
+ cur.prev = l;
+ l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ l = l->next;
+ memcpy (l, &cur, sizeof (cur));
+ l->next = NULL;
+ }
+ l->tstamp = fp.mtime;
+ }
+
+ xfree (line);
+ }
+
+ fclose (fp);
+ return dir;
+}
+#endif
+
+/* This function switches between the correct parsing routine
+ depending on the SYSTEM_TYPE. If system type is ST_UNIX, we use
+ our home-grown ftp_parse_unix_ls; otherwise, we use our interface
+ to ftpparse, also known as ftp_parse_nonunix_ls. The system type
+ should be based on the result of the "SYST" response of the FTP
+ server. */
+
+struct fileinfo *
+ftp_parse_ls (const char *file, const enum stype system_type)
+{
+ switch (system_type)
+ {
+ case ST_UNIX:
+ return ftp_parse_unix_ls (file, FALSE);
+ case ST_WINNT:
+ {
+ /* Detect whether the listing is simulating the UNIX format */
+ FILE *fp;
+ int c;
+ fp = fopen (file, "rb");
+ if (!fp)
{
- case '&':
- *p++ = '&';
- *p++ = 'a';
- *p++ = 'm';
- *p++ = 'p';
- *p++ = ';';
- break;
- case '<': case '>':
- *p++ = '&';
- *p++ = (*s == '<' ? 'l' : 'g');
- *p++ = 't';
- *p++ = ';';
- break;
- case '\"':
- *p++ = '&';
- *p++ = 'q';
- *p++ = 'u';
- *p++ = 'o';
- *p++ = 't';
- *p++ = ';';
- break;
- default:
- *p++ = *s;
- }
+ logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
+ return NULL;
+ }
+ c = fgetc(fp);
+ fclose(fp);
+ /* If the first character of the file is '0'-'9', it's WINNT
+ format. */
+ if (c >= '0' && c <='9')
+ return ftp_parse_winnt_ls (file);
+ else
+ return ftp_parse_unix_ls (file, TRUE);
+ }
+ default:
+#ifdef HAVE_FTPPARSE
+ return ftp_parse_nonunix_ls (file);
+#else
+ /* #### Maybe log some warning here? */
+ return ftp_parse_unix_ls (file);
+#endif
}
- *p = '\0';
- return res;
}
+\f
+/* Stuff for creating FTP index. */
/* The function creates an HTML index containing references to given
directories and files on the appropriate host. The references are
upwd = (char *)xmalloc (strlen (tmpu)
+ (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
- free (tmpu);
+ xfree (tmpu);
FREE_MAYBE (tmpp);
}
else
else if (f->type == FT_SYMLINK)
fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
putc ('\n', fp);
- free (htclfile);
+ xfree (htclfile);
f = f->next;
}
fprintf (fp, "</pre>\n</body>\n</html>\n");
- free (upwd);
+ xfree (upwd);
if (!opt.dfp)
fclose (fp);
else