/* Parsing FTP `ls' output.
- Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 2000, 2001
+ Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
-This program is distributed in the hope that it will be useful,
+GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+along with Wget; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
-#include <sys/types.h>
-#include <ctype.h>
#include <errno.h>
+#include <time.h>
#include "wget.h"
#include "utils.h"
#include "ftp.h"
#include "url.h"
+#include "convert.h" /* for html_quote_string prototype */
-/* Undef this if FTPPARSE is not available. In that case, Wget will
- still work with Unix FTP servers, which covers most cases. */
-
-#define HAVE_FTPPARSE
-
-#ifdef HAVE_FTPPARSE
-#include "ftpparse.h"
-#endif
+extern FILE *output_stream;
/* Converts symbolic permissions to number-style ones, e.g. string
rwxr-xr-x to 755. For now, it knows nothing of
}
+/* Cleans a line of text so that it can be consistently parsed. Destroys
+ <CR> and <LF> in case that thay occur at the end of the line and
+ replaces all <TAB> character with <SPACE>. Returns the length of the
+ modified line. */
+static int
+clean_line(char *line)
+{
+ int len = strlen (line);
+ if (!len) return 0;
+ if (line[len - 1] == '\n')
+ line[--len] = '\0';
+ if (line[len - 1] == '\r')
+ line[--len] = '\0';
+ for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
+ return len;
+}
+
/* Convert the Un*x-ish style directory listing stored in FILE to a
linked list of fileinfo (system-independent) entries. The contents
of FILE are considered to be produced by the standard Unix `ls -la'
dir = l = NULL;
/* Line loop to end of file: */
- while ((line = read_whole_line (fp)))
+ while ((line = read_whole_line (fp)) != NULL)
{
- DEBUGP (("%s\n", line));
- len = strlen (line);
- /* Destroy <CR><LF> if present. */
- if (len && line[len - 1] == '\n')
- line[--len] = '\0';
- if (len && line[len - 1] == '\r')
- line[--len] = '\0';
-
+ len = clean_line (line);
/* Skip if total... */
if (!strncasecmp (line, "total", 5))
{
break;
default:
cur.type = FT_UNKNOWN;
- DEBUGP (("UNKOWN; "));
+ DEBUGP (("UNKNOWN; "));
break;
}
switch (cur.type)
{
case FT_PLAINFILE:
- cur.perms = 420;
+ cur.perms = 0644;
break;
case FT_DIRECTORY:
- cur.perms = 493;
+ cur.perms = 0755;
break;
default:
- cur.perms = 1023;
+ /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
+ cur.perms = 0644;
}
- DEBUGP (("implicite perms %0o; ", cur.perms));
+ DEBUGP (("implicit perms %0o; ", cur.perms));
}
else
{
This tactic is quite dubious when it comes to
internationalization issues (non-English month names), but it
works for now. */
- while ((tok = strtok (NULL, " ")))
+ while ((tok = strtok (NULL, " ")) != NULL)
{
--next;
if (next < 0) /* a month name was not encountered */
size, and the filename is three tokens away. */
if (i != 12)
{
- char *t = tok - 2;
- long mul = 1;
+ wgint size;
- for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
- cur.size += mul * (*t - '0');
+ /* Back up to the beginning of the previous token
+ and parse it with str_to_wgint. */
+ char *t = tok - 2;
+ while (t > line && ISDIGIT (*t))
+ --t;
if (t == line)
{
- /* Something is seriously wrong. */
+ /* Something has gone wrong during parsing. */
error = 1;
break;
}
+ errno = 0;
+ size = str_to_wgint (t, NULL, 10);
+ if (size == WGINT_MAX && errno == ERANGE)
+ /* Out of range -- ignore the size. #### Should
+ we refuse to start the download. */
+ cur.size = 0;
+ else
+ cur.size = size;
+
month = i;
next = 5;
DEBUGP (("month: %s; ", months[month]));
default -F output. I believe these cases are very
rare. */
fnlen = strlen (tok); /* re-calculate `fnlen' */
- cur.name = (char *)xmalloc (fnlen + 1);
+ cur.name = xmalloc (fnlen + 1);
memcpy (cur.name, tok, fnlen + 1);
if (fnlen)
{
if (error || ignore)
{
DEBUGP (("Skipping.\n"));
- FREE_MAYBE (cur.name);
- FREE_MAYBE (cur.linkto);
+ xfree_null (cur.name);
+ xfree_null (cur.linkto);
xfree (line);
continue;
}
if (!dir)
{
- l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ l = dir = xnew (struct fileinfo);
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
- l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ l->next = xnew (struct fileinfo);
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
FILE *fp;
int len;
int year, month, day; /* for time analysis */
- int hour, min, sec;
+ int hour, min;
struct tm timestruct;
char *line, *tok; /* tokenizer */
dir = l = NULL;
/* Line loop to end of file: */
- while ((line = read_whole_line (fp)))
+ while ((line = read_whole_line (fp)) != NULL)
{
- DEBUGP (("%s\n", line));
- len = strlen (line);
- /* Destroy <CR><LF> if present. */
- if (len && line[len - 1] == '\n')
- line[--len] = '\0';
- if (len && line[len - 1] == '\r')
- line[--len] = '\0';
+ len = clean_line (line);
/* Extracting name is a bit of black magic and we have to do it
before `strtok' inserted extra \0 characters in the line
cur.name = xstrdup(tok);
DEBUGP(("Name: '%s'\n", cur.name));
- /* First column: mm-dd-yy */
+ /* First column: mm-dd-yy. Should atoi() on the month fail, january
+ will be assumed. */
tok = strtok(line, "-");
- month = atoi(tok);
+ if (tok == NULL) continue;
+ month = atoi(tok) - 1;
+ if (month < 0) month = 0;
tok = strtok(NULL, "-");
+ if (tok == NULL) continue;
day = atoi(tok);
tok = strtok(NULL, " ");
+ if (tok == NULL) continue;
year = atoi(tok);
/* Assuming the epoch starting at 1.1.1970 */
if (year <= 70) year += 100;
- /* Second column: hh:mm[AP]M */
+ /* Second column: hh:mm[AP]M, listing does not contain value for
+ seconds */
tok = strtok(NULL, ":");
+ if (tok == NULL) continue;
hour = atoi(tok);
tok = strtok(NULL, "M");
+ if (tok == NULL) continue;
min = atoi(tok);
- /* Adjust hour from AM/PM */
+ /* Adjust hour from AM/PM. Just for the record, the sequence goes
+ 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
tok+=2;
+ if (hour == 12) hour = 0;
if (*tok == 'P') hour += 12;
- /* Listing does not contain value for seconds */
- sec = 0;
DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
year+1900, month, day, hour, min));
/* Build the time-stamp (copy & paste from above) */
- timestruct.tm_sec = sec;
+ timestruct.tm_sec = 0;
timestruct.tm_min = min;
timestruct.tm_hour = hour;
timestruct.tm_mday = day;
directories as the listing does not give us a clue) and filetype
here. */
tok = strtok(NULL, " ");
- while (*tok == '\0') tok = strtok(NULL, " ");
+ if (tok == NULL) continue;
+ while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
+ if (tok == NULL) continue;
if (*tok == '<')
{
cur.type = FT_DIRECTORY;
cur.size = 0;
- cur.perms = 493; /* my gcc does not like 0755 ?? */
+ cur.perms = 0755;
DEBUGP(("Directory\n"));
}
else
{
+ wgint size;
cur.type = FT_PLAINFILE;
- cur.size = atoi(tok);
- cur.perms = 420; /* 0664 octal */
- DEBUGP(("File, size %ld bytes\n", cur.size));
+ errno = 0;
+ size = str_to_wgint (tok, NULL, 10);
+ if (size == WGINT_MAX && errno == ERANGE)
+ cur.size = 0; /* overflow */
+ else
+ cur.size = size;
+ cur.perms = 0644;
+ DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
}
cur.linkto = NULL;
/* And put everything into the linked list */
if (!dir)
{
- l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ l = dir = xnew (struct fileinfo);
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
- l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
+ l->next = xnew (struct fileinfo);
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
}
- xfree(line);
+ xfree (line);
}
fclose(fp);
return dir;
}
+/* Converts VMS symbolic permissions to number-style ones, e.g. string
+ RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
+ (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
+static int
+vmsperms (const char *s)
+{
+ int perms = 0;
-#ifdef HAVE_FTPPARSE
+ do
+ {
+ switch (*s) {
+ case ',': perms <<= 3; break;
+ case 'R': perms |= 4; break;
+ case 'W': perms |= 2; break;
+ case 'D': perms |= 2; break;
+ case 'E': perms |= 1; break;
+ default: DEBUGP(("wrong VMS permissons!\n"));
+ }
+ }
+ while (*++s);
+ return perms;
+}
-/* This is a "glue function" that connects the ftpparse interface to
- the interface Wget expects. ftpparse is used to parse listings
- from servers other than Unix, like those running VMS or NT. */
static struct fileinfo *
-ftp_parse_nonunix_ls (const char *file)
+ftp_parse_vms_ls (const char *file)
{
FILE *fp;
- int len;
+ /* #### A third copy of more-or-less the same array ? */
+ static const char *months[] = {
+ "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
+ "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
+ };
+ int i;
+ int year, month, day; /* for time analysis */
+ int hour, min, sec;
+ struct tm timestruct;
- char *line; /* tokenizer */
+ char *line, *tok; /* tokenizer */
struct fileinfo *dir, *l, cur; /* list creation */
fp = fopen (file, "rb");
}
dir = l = NULL;
+ /* Skip empty line. */
+ line = read_whole_line (fp);
+ xfree_null (line);
+
+ /* Skip "Directory PUB$DEVICE[PUB]" */
+ line = read_whole_line (fp);
+ xfree_null (line);
+
+ /* Skip empty line. */
+ line = read_whole_line (fp);
+ xfree_null (line);
+
/* Line loop to end of file: */
- while ((line = read_whole_line (fp)))
+ while ((line = read_whole_line (fp)) != NULL)
{
- struct ftpparse fp;
+ char *p;
+ i = clean_line (line);
+ if (!i)
+ {
+ xfree (line);
+ break;
+ }
- DEBUGP (("%s\n", line));
- len = strlen (line);
- /* Destroy <CR><LF> if present. */
- if (len && line[len - 1] == '\n')
- line[--len] = '\0';
- if (len && line[len - 1] == '\r')
- line[--len] = '\0';
+ /* First column: Name. A bit of black magic again. The name my be
+ either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
+ line. Therefore we will first try to get the complete name
+ until the first space character; if it fails, we assume that the name
+ occupies the whole line. After that we search for the version
+ separator ";", we remove it and check the extension of the file;
+ extension .DIR denotes directory. */
+
+ tok = strtok(line, " ");
+ if (tok == NULL) tok = line;
+ DEBUGP(("file name: '%s'\n", tok));
+ for (p = tok ; *p && *p != ';' ; p++);
+ if (*p == ';') *p = '\0';
+ p = tok + strlen(tok) - 4;
+ if (!strcmp(p, ".DIR")) *p = '\0';
+ cur.name = xstrdup(tok);
+ DEBUGP(("Name: '%s'\n", cur.name));
- if (ftpparse(&fp, line, len))
+ /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
+ the file size to zero as the listing does tell us only the size in
+ filesystem blocks - for an integrity check (when mirroring, for
+ example) we would need the size in bytes. */
+
+ if (! *p)
{
- cur.size = fp.size;
- cur.name = (char *)xmalloc (fp.namelen + 1);
- memcpy (cur.name, fp.name, fp.namelen);
- cur.name[fp.namelen] = '\0';
- DEBUGP (("%s\n", cur.name));
- /* No links on non-UNIX systems */
- cur.linkto = NULL;
- /* ftpparse won't tell us correct permisions. So lets just invent
- something. */
- if (fp.flagtrycwd)
- {
- cur.type = FT_DIRECTORY;
- cur.perms = 0755;
- }
- else
- {
- cur.type = FT_PLAINFILE;
- cur.perms = 0644;
- }
- if (!dir)
- {
- l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
- memcpy (l, &cur, sizeof (cur));
- l->prev = l->next = NULL;
- }
- else
- {
- cur.prev = l;
- l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
- l = l->next;
- memcpy (l, &cur, sizeof (cur));
- l->next = NULL;
- }
- l->tstamp = fp.mtime;
+ cur.type = FT_DIRECTORY;
+ cur.size = 0;
+ DEBUGP(("Directory\n"));
+ }
+ else
+ {
+ cur.type = FT_PLAINFILE;
+ DEBUGP(("File\n"));
+ }
+
+ cur.size = 0;
+
+ /* Second column, if exists, or the first column of the next line
+ contain file size in blocks. We will skip it. */
+
+ tok = strtok(NULL, " ");
+ if (tok == NULL)
+ {
+ DEBUGP(("Getting additional line\n"));
+ xfree (line);
+ line = read_whole_line (fp);
+ if (!line)
+ {
+ DEBUGP(("empty line read, leaving listing parser\n"));
+ break;
+ }
+ i = clean_line (line);
+ if (!i)
+ {
+ DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
+ xfree (line);
+ break;
+ }
+ tok = strtok(line, " ");
}
+ DEBUGP(("second token: '%s'\n", tok));
+
+ /* Third/Second column: Date DD-MMM-YYYY. */
+
+ tok = strtok(NULL, "-");
+ if (tok == NULL) continue;
+ DEBUGP(("day: '%s'\n",tok));
+ day = atoi(tok);
+ tok = strtok(NULL, "-");
+ if (!tok)
+ {
+ /* If the server produces garbage like
+ 'EA95_0PS.GZ;1 No privilege for attempted operation'
+ the first strtok(NULL, "-") will return everything until the end
+ of the line and only the next strtok() call will return NULL. */
+ DEBUGP(("nonsense in VMS listing, skipping this line\n"));
+ xfree (line);
+ break;
+ }
+ for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
+ /* Uknown months are mapped to January */
+ month = i % 12 ;
+ tok = strtok (NULL, " ");
+ if (tok == NULL) continue;
+ year = atoi (tok) - 1900;
+ DEBUGP(("date parsed\n"));
+
+ /* Fourth/Third column: Time hh:mm[:ss] */
+ tok = strtok (NULL, " ");
+ if (tok == NULL) continue;
+ min = sec = 0;
+ p = tok;
+ hour = atoi (p);
+ for (; *p && *p != ':'; ++p);
+ if (*p)
+ min = atoi (++p);
+ for (; *p && *p != ':'; ++p);
+ if (*p)
+ sec = atoi (++p);
+
+ DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
+ year+1900, month, day, hour, min, sec));
+
+ /* Build the time-stamp (copy & paste from above) */
+ timestruct.tm_sec = sec;
+ timestruct.tm_min = min;
+ timestruct.tm_hour = hour;
+ timestruct.tm_mday = day;
+ timestruct.tm_mon = month;
+ timestruct.tm_year = year;
+ timestruct.tm_wday = 0;
+ timestruct.tm_yday = 0;
+ timestruct.tm_isdst = -1;
+ cur.tstamp = mktime (×truct); /* store the time-stamp */
+
+ DEBUGP(("Timestamp: %ld\n", cur.tstamp));
+
+ /* Skip the fifth column */
+
+ tok = strtok(NULL, " ");
+ if (tok == NULL) continue;
+
+ /* Sixth column: Permissions */
+
+ tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
+ if (tok == NULL) continue;
+ tok = strtok(NULL, ")");
+ if (tok == NULL)
+ {
+ DEBUGP(("confusing VMS permissions, skipping line\n"));
+ xfree (line);
+ continue;
+ }
+ /* Permissons have the format "RWED,RWED,RE" */
+ cur.perms = vmsperms(tok);
+ DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
+
+ cur.linkto = NULL;
+
+ /* And put everything into the linked list */
+ if (!dir)
+ {
+ l = dir = xnew (struct fileinfo);
+ memcpy (l, &cur, sizeof (cur));
+ l->prev = l->next = NULL;
+ }
+ else
+ {
+ cur.prev = l;
+ l->next = xnew (struct fileinfo);
+ l = l->next;
+ memcpy (l, &cur, sizeof (cur));
+ l->next = NULL;
+ }
xfree (line);
}
fclose (fp);
return dir;
}
-#endif
-/* This function switches between the correct parsing routine
- depending on the SYSTEM_TYPE. If system type is ST_UNIX, we use
- our home-grown ftp_parse_unix_ls; otherwise, we use our interface
- to ftpparse, also known as ftp_parse_nonunix_ls. The system type
- should be based on the result of the "SYST" response of the FTP
- server. */
+
+/* This function switches between the correct parsing routine depending on
+ the SYSTEM_TYPE. The system type should be based on the result of the
+ "SYST" response of the FTP server. According to this repsonse we will
+ use on of the three different listing parsers that cover the most of FTP
+ servers used nowadays. */
struct fileinfo *
ftp_parse_ls (const char *file, const enum stype system_type)
switch (system_type)
{
case ST_UNIX:
- return ftp_parse_unix_ls (file, FALSE);
+ return ftp_parse_unix_ls (file, 0);
case ST_WINNT:
{
/* Detect whether the listing is simulating the UNIX format */
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
- }
+ }
c = fgetc(fp);
fclose(fp);
/* If the first character of the file is '0'-'9', it's WINNT
format. */
if (c >= '0' && c <='9')
return ftp_parse_winnt_ls (file);
- else
- return ftp_parse_unix_ls (file, TRUE);
+ else
+ return ftp_parse_unix_ls (file, 1);
}
+ case ST_VMS:
+ return ftp_parse_vms_ls (file);
+ case ST_MACOS:
+ return ftp_parse_unix_ls (file, 1);
default:
-#ifdef HAVE_FTPPARSE
- return ftp_parse_nonunix_ls (file);
-#else
- /* #### Maybe log some warning here? */
- return ftp_parse_unix_ls (file);
-#endif
+ logprintf (LOG_NOTQUIET, _("\
+Unsupported listing type, trying Unix listing parser.\n"));
+ return ftp_parse_unix_ls (file, 0);
}
}
\f
directories and files on the appropriate host. The references are
FTP. */
uerr_t
-ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
+ftp_index (const char *file, struct url *u, struct fileinfo *f)
{
FILE *fp;
char *upwd;
char *htclfile; /* HTML-clean file name */
- if (!opt.dfp)
+ if (!output_stream)
{
fp = fopen (file, "wb");
if (!fp)
}
}
else
- fp = opt.dfp;
+ fp = output_stream;
if (u->user)
{
char *tmpu, *tmpp; /* temporary, clean user and passwd */
- tmpu = CLEANDUP (u->user);
- tmpp = u->passwd ? CLEANDUP (u->passwd) : NULL;
- upwd = (char *)xmalloc (strlen (tmpu)
- + (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
- sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
+ tmpu = url_escape (u->user);
+ tmpp = u->passwd ? url_escape (u->passwd) : NULL;
+ if (tmpp)
+ upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
+ else
+ upwd = concat_strings (tmpu, "@", (char *) 0);
xfree (tmpu);
- FREE_MAYBE (tmpp);
+ xfree_null (tmpp);
}
else
upwd = xstrdup ("");
fprintf (fp, " ");
if (f->tstamp != -1)
{
- /* #### Should we translate the months? */
- static char *months[] = {
+ /* #### Should we translate the months? Or, even better, use
+ ISO 8601 dates? */
+ static const char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
break;
}
htclfile = html_quote_string (f->name);
- fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
+ fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
if (*u->dir != '/')
putc ('/', fp);
fprintf (fp, "%s", u->dir);
putc ('/', fp);
fprintf (fp, "</a> ");
if (f->type == FT_PLAINFILE)
- fprintf (fp, _(" (%s bytes)"), legible (f->size));
+ fprintf (fp, _(" (%s bytes)"), with_thousand_seps (f->size));
else if (f->type == FT_SYMLINK)
fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
putc ('\n', fp);
}
fprintf (fp, "</pre>\n</body>\n</html>\n");
xfree (upwd);
- if (!opt.dfp)
+ if (!output_stream)
fclose (fp);
else
fflush (fp);