Published in <sxs4rnnlklo.fsf@florida.arsdigita.de>.
+2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
+
+ * configure.in: Check for strpbrk().
+
2001-05-14 Herold Heiko <Heiko.Herold@previnet.it>
* windows/Makefile.src:
dnl
AC_FUNC_ALLOCA
AC_FUNC_MMAP
-AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp)
+AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp strpbrk)
AC_CHECK_FUNCS(gettimeofday mktime strptime)
AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty)
AC_CHECK_FUNCS(uname gethostname)
+2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
+
+ * utils.c (path_simplify): Don't remove trailing slashes.
+
+ * ftp.c (ftp_get_listing): Use it.
+
+ * utils.c (file_merge): New function.
+
+ * url.c (opt_url): Removed.
+
+ * recur.c (recursive_retrieve): Inline "opt_url" logic.
+
+ * main.c (main): Use xfree(), not free().
+
+ * url.c (rewrite_url_maybe): Renamed to rewrite_shorthand_url.
+
+ * ftp.c (ccon): Move `ccon' typedef here, since it's only used
+ internally.
+
+ * config.h.in: Include a stub for HAVE_STRPBRK.
+
+ * cmpt.c (strpbrk): Include a replacement for systems without
+ strpbrk().
+
+ * ftp.c: Use url_set_dir and url_set_file when modifying the URL.
+
+ * url.c (url_set_dir): New function.
+ (url_set_file): Ditto.
+
+ * ftp-basic.c (ftp_process_type): Process FTP type here; the URL
+ parser makes the URL "params" available, so we can do that in this
+ function.
+
+ * retr.c: Ditto.
+
+ * ftp.c: Ditto; pass the local file information in `ccon'.
+
+ * http.c: Get rid of the ugly kludge that had URL being replaced
+ with the proxy URL when proxy retrieval was requested. Use a
+ separate parameter to http_loop and gethttp for the proxy URL.
+
+ * http.c: Changed to reflect the fact that local file, proxy, and
+ referer information are no longer stored in struct url. The local
+ file information is passed in `struct hstat' now.
+
+ * url.c: Reworked URL parsing to be more regular. Reencode the
+ URL using reencode_string.
+ Removed non-URL-related information from struct url. This
+ includes fields `proxy', `local', and `referer'.
+
2001-11-22 Jochen Hein <jochen@jochen.org>
* main.c (main): Split the copyright notice for easier
}
#endif /* not HAVE_STRSTR */
+#ifndef HAVE_STRPBRK
+/* Find the first ocurrence in S of any character in ACCEPT. */
+char *
+strpbrk (const char *s, const char *accept)
+{
+ while (*s != '\0')
+ {
+ const char *a = accept;
+ while (*a != '\0')
+ if (*a++ == *s)
+ return (char *) s;
+ ++s;
+ }
+
+ return 0;
+}
+#endif /* HAVE_STRPBRK */
+
#ifndef HAVE_MKTIME
/* From GNU libc 2.0. */
/* Define if you have the strncasecmp function. */
#undef HAVE_STRNCASECMP
+/* Define if you have the strpbrk function. */
+#undef HAVE_STRPBRK
+
/* Define if you have the strptime function. */
#undef HAVE_STRPTIME
int
set_cookie_header_cb (const char *hdr, void *closure)
{
- struct urlinfo *u = (struct urlinfo *)closure;
+ struct url *u = (struct url *)closure;
struct cookie *cookie;
cookies_now = time (NULL);
/* All OK. */
return FTPOK;
}
+
/* Sends the SIZE command to the server, and returns the value in 'size'.
* If an error occurs, size is set to zero. */
uerr_t
/* All OK. */
return FTPOK;
}
+
+/* If URL's params are of the form "type=X", return character X.
+ Otherwise, return 'I' (the default type). */
+char
+ftp_process_type (const char *params)
+{
+ if (params
+ && 0 == strncasecmp (params, "type=", 5)
+ && params[5] != '\0')
+ return TOUPPER (params[5]);
+ else
+ return 'I';
+}
directories and files on the appropriate host. The references are
FTP. */
uerr_t
-ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
+ftp_index (const char *file, struct url *u, struct fileinfo *f)
{
FILE *fp;
char *upwd;
extern char ftp_last_respline[];
+typedef struct
+{
+ int st; /* connection status */
+ int cmd; /* command code */
+ struct rbuf rbuf; /* control connection buffer */
+ long dltime; /* time of the download */
+ enum stype rs; /* remote system reported by ftp server */
+ char *id; /* initial directory */
+ char *target; /* target file name */
+} ccon;
+
+
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
the string S, and return the number converted to long, if found, 0
otherwise. */
connection to the server. It always closes the data connection,
and closes the control connection in case of error. */
static uerr_t
-getftp (struct urlinfo *u, long *len, long restval, ccon *con)
+getftp (struct url *u, long *len, long restval, ccon *con)
{
int csock, dtsock, res;
uerr_t err;
long expected_bytes = 0L;
assert (con != NULL);
- assert (u->local != NULL);
+ assert (con->target != NULL);
+
/* Debug-check of the sanity of the request by making sure that LIST
and RETR are never both requested (since we can handle only one
at a time. */
csock = RBUF_FD (&con->rbuf);
else /* cmd & DO_LOGIN */
{
+ char type_char;
+
/* Login to the server: */
/* First: Establish the control connection. */
logputs (LOG_VERBOSE, _("done.\n"));
/* Fifth: Set the FTP type. */
+ type_char = ftp_process_type (u->params);
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "==> TYPE %c ... ", TOUPPER (u->ftp_type));
- err = ftp_type (&con->rbuf, TOUPPER (u->ftp_type));
+ logprintf (LOG_VERBOSE, "==> TYPE %c ... ", type_char);
+ err = ftp_type (&con->rbuf, type_char);
/* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */
switch (err)
{
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET,
_("Unknown type `%c', closing control connection.\n"),
- TOUPPER (u->ftp_type));
+ type_char);
CLOSE (csock);
rbuf_uninitialize (&con->rbuf);
return err;
{
logprintf (LOG_NOTQUIET,
_("\nREST failed; will not truncate `%s'.\n"),
- u->local);
+ con->target);
CLOSE (csock);
closeport (dtsock);
rbuf_uninitialize (&con->rbuf);
/* Open the file -- if opt.dfp is set, use it instead. */
if (!opt.dfp || con->cmd & DO_LIST)
{
- mkalldirs (u->local);
+ mkalldirs (con->target);
if (opt.backups)
- rotate_backups (u->local);
+ rotate_backups (con->target);
/* #### Is this correct? */
- chmod (u->local, 0600);
+ chmod (con->target, 0600);
- fp = fopen (u->local, restval ? "ab" : "wb");
+ fp = fopen (con->target, restval ? "ab" : "wb");
if (!fp)
{
- logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
+ logprintf (LOG_NOTQUIET, "%s: %s\n", con->target, strerror (errno));
CLOSE (csock);
rbuf_uninitialize (&con->rbuf);
closeport (dtsock);
if (res == -2)
{
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
- u->local, strerror (errno));
+ con->target, strerror (errno));
CLOSE (csock);
rbuf_uninitialize (&con->rbuf);
return FWRITEERR;
print it out. */
if (opt.server_response && (con->cmd & DO_LIST))
{
- mkalldirs (u->local);
- fp = fopen (u->local, "r");
+ mkalldirs (con->target);
+ fp = fopen (con->target, "r");
if (!fp)
- logprintf (LOG_ALWAYS, "%s: %s\n", u->local, strerror (errno));
+ logprintf (LOG_ALWAYS, "%s: %s\n", con->target, strerror (errno));
else
{
char *line;
This loop either gets commands from con, or (if ON_YOUR_OWN is
set), makes them up to retrieve the file given by the URL. */
static uerr_t
-ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
+ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
{
int count, orig_lp;
long restval, len;
uerr_t err;
struct stat st;
- if (!u->local)
- u->local = url_filename (u);
+ if (!con->target)
+ con->target = url_filename (u);
- if (opt.noclobber && file_exists_p (u->local))
+ if (opt.noclobber && file_exists_p (con->target))
{
logprintf (LOG_VERBOSE,
- _("File `%s' already there, not retrieving.\n"), u->local);
+ _("File `%s' already there, not retrieving.\n"), con->target);
/* If the file is there, we suppose it's retrieved OK. */
return RETROK;
}
/* Remove it if it's a link. */
- remove_link (u->local);
+ remove_link (con->target);
if (!opt.output_document)
- locf = u->local;
+ locf = con->target;
else
locf = opt.output_document;
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- char *hurl = str_url (u->proxy ? u->proxy : u, 1);
+ char *hurl = url_string (u, 1);
char tmp[15];
strcpy (tmp, " ");
if (count > 1)
/* Need to hide the password from the URL. The `if' is here
so that we don't do the needless allocation every
time. */
- char *hurl = str_url (u->proxy ? u->proxy : u, 1);
+ char *hurl = url_string (u, 1);
logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n",
tms, hurl, len, locf, count);
xfree (hurl);
/* Return the directory listing in a reusable format. The directory
is specifed in u->dir. */
uerr_t
-ftp_get_listing (struct urlinfo *u, ccon *con, struct fileinfo **f)
+ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f)
{
uerr_t err;
- char *olocal = u->local;
- char *list_filename, *ofile;
+ char *uf; /* url file name */
+ char *lf; /* list file name */
+ char *old_target = con->target;
con->st &= ~ON_YOUR_OWN;
con->cmd |= (DO_LIST | LEAVE_PENDING);
con->cmd &= ~DO_RETR;
- /* Get the listing filename. */
- ofile = u->file;
- u->file = LIST_FILENAME;
- list_filename = url_filename (u);
- u->file = ofile;
- u->local = list_filename;
- DEBUGP ((_("Using `%s' as listing tmp file.\n"), list_filename));
+
+ /* Find the listing file name. We do it by taking the file name of
+ the URL and replacing the last component with the listing file
+ name. */
+ uf = url_filename (u);
+ lf = file_merge (uf, LIST_FILENAME);
+ xfree (uf);
+ DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
+
+ con->target = lf;
err = ftp_loop_internal (u, NULL, con);
- u->local = olocal;
+ con->target = old_target;
+
if (err == RETROK)
- *f = ftp_parse_ls (list_filename, con->rs);
+ *f = ftp_parse_ls (lf, con->rs);
else
*f = NULL;
if (opt.remove_listing)
{
- if (unlink (list_filename))
+ if (unlink (lf))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
else
- logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), list_filename);
+ logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), lf);
}
- xfree (list_filename);
+ xfree (lf);
con->cmd &= ~DO_LIST;
return err;
}
-static uerr_t ftp_retrieve_dirs PARAMS ((struct urlinfo *, struct fileinfo *,
+static uerr_t ftp_retrieve_dirs PARAMS ((struct url *, struct fileinfo *,
ccon *));
-static uerr_t ftp_retrieve_glob PARAMS ((struct urlinfo *, ccon *, int));
+static uerr_t ftp_retrieve_glob PARAMS ((struct url *, ccon *, int));
static struct fileinfo *delelement PARAMS ((struct fileinfo *,
struct fileinfo **));
static void freefileinfo PARAMS ((struct fileinfo *f));
If opt.recursive is set, after all files have been retrieved,
ftp_retrieve_dirs will be called to retrieve the directories. */
static uerr_t
-ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
+ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con)
{
static int depth = 0;
uerr_t err;
- char *olocal, *ofile;
struct fileinfo *orig;
long local_size;
time_t tml;
while (f)
{
+ char *old_target, *ofile;
+
if (downloaded_exceeds_quota ())
{
--depth;
return QUOTEXC;
}
- olocal = u->local;
- ofile = u->file;
- u->file = f->name;
- u->local = url_filename (u);
+ old_target = con->target;
+
+ ofile = xstrdup (u->file);
+ url_set_file (u, f->name);
+
+ con->target = url_filename (u);
err = RETROK;
dlthis = 1;
I'm not implementing it now since files on an FTP server are much
more likely than files on an HTTP server to legitimately have a
.orig suffix. */
- if (!stat (u->local, &st))
+ if (!stat (con->target, &st))
{
int eq_size;
int cor_val;
/* Remote file is older, file sizes can be compared and
are both equal. */
logprintf (LOG_VERBOSE, _("\
-Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
+Remote file no newer than local file `%s' -- not retrieving.\n"), con->target);
dlthis = 0;
}
else if (eq_size)
/* Remote file is newer or sizes cannot be matched */
logprintf (LOG_VERBOSE, _("\
Remote file is newer than local file `%s' -- retrieving.\n\n"),
- u->local);
+ con->target);
}
else
{
struct stat st;
/* Check whether we already have the correct
symbolic link. */
- int rc = lstat (u->local, &st);
+ int rc = lstat (con->target, &st);
if (rc == 0)
{
size_t len = strlen (f->linkto) + 1;
if (S_ISLNK (st.st_mode))
{
char *link_target = (char *)alloca (len);
- size_t n = readlink (u->local, link_target, len);
+ size_t n = readlink (con->target, link_target, len);
if ((n == len - 1)
&& (memcmp (link_target, f->linkto, n) == 0))
{
logprintf (LOG_VERBOSE, _("\
Already have correct symlink %s -> %s\n\n"),
- u->local, f->linkto);
+ con->target, f->linkto);
dlthis = 0;
break;
}
}
}
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
- u->local, f->linkto);
+ con->target, f->linkto);
/* Unlink before creating symlink! */
- unlink (u->local);
- if (symlink (f->linkto, u->local) == -1)
+ unlink (con->target);
+ if (symlink (f->linkto, con->target) == -1)
logprintf (LOG_NOTQUIET, "symlink: %s\n",
strerror (errno));
logputs (LOG_VERBOSE, "\n");
#else /* not HAVE_SYMLINK */
logprintf (LOG_NOTQUIET,
_("Symlinks not supported, skipping symlink `%s'.\n"),
- u->local);
+ con->target);
#endif /* not HAVE_SYMLINK */
}
else /* opt.retr_symlinks */
if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
&& f->tstamp != -1
&& dlthis
- && file_exists_p (u->local))
+ && file_exists_p (con->target))
{
/* #### This code repeats in http.c and ftp.c. Move it to a
function! */
fl = opt.output_document;
}
else
- fl = u->local;
+ fl = con->target;
if (fl)
touch (fl, f->tstamp);
}
else if (f->tstamp == -1)
- logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), u->local);
+ logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), con->target);
if (f->perms && f->type == FT_PLAINFILE && dlthis)
- chmod (u->local, f->perms);
+ chmod (con->target, f->perms);
else
- DEBUGP (("Unrecognized permissions for %s.\n", u->local));
+ DEBUGP (("Unrecognized permissions for %s.\n", con->target));
+
+ xfree (con->target);
+ con->target = old_target;
+
+ url_set_file (u, ofile);
+ xfree (ofile);
- xfree (u->local);
- u->local = olocal;
- u->file = ofile;
/* Break on fatals. */
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
break;
con->cmd &= ~ (DO_CWD | DO_LOGIN);
f = f->next;
- } /* while */
+ }
+
/* We do not want to call ftp_retrieve_dirs here */
if (opt.recursive &&
!(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel))
ftp_retrieve_glob on each directory entry. The function knows
about excluded directories. */
static uerr_t
-ftp_retrieve_dirs (struct urlinfo *u, struct fileinfo *f, ccon *con)
+ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con)
{
- char *odir;
- char *current_container = NULL;
- int current_length = 0;
+ char *container = NULL;
+ int container_size = 0;
for (; f; f = f->next)
{
- int len;
+ int size;
+ char *odir, *newdir;
if (downloaded_exceeds_quota ())
break;
if (f->type != FT_DIRECTORY)
continue;
- odir = u->dir;
- len = strlen (u->dir) + 1 + strlen (f->name) + 1;
+
/* Allocate u->dir off stack, but reallocate only if a larger
- string is needed. */
- if (len > current_length)
- current_container = (char *)alloca (len);
- u->dir = current_container;
+ string is needed. It's a pity there's no "realloca" for an
+ item on the bottom of the stack. */
+ size = strlen (u->dir) + 1 + strlen (f->name) + 1;
+ if (size > container_size)
+ container = (char *)alloca (size);
+ newdir = container;
+
+ odir = u->dir;
if (*odir == '\0'
|| (*odir == '/' && *(odir + 1) == '\0'))
/* If ODIR is empty or just "/", simply append f->name to
ODIR. (In the former case, to preserve u->dir being
relative; in the latter case, to avoid double slash.) */
- sprintf (u->dir, "%s%s", odir, f->name);
+ sprintf (newdir, "%s%s", odir, f->name);
else
/* Else, use a separator. */
- sprintf (u->dir, "%s/%s", odir, f->name);
+ sprintf (newdir, "%s/%s", odir, f->name);
+
DEBUGP (("Composing new CWD relative to the initial directory.\n"));
- DEBUGP ((" odir = '%s'\n f->name = '%s'\n u->dir = '%s'\n\n",
- odir, f->name, u->dir));
- if (!accdir (u->dir, ALLABS))
+ DEBUGP ((" odir = '%s'\n f->name = '%s'\n newdir = '%s'\n\n",
+ odir, f->name, newdir));
+ if (!accdir (newdir, ALLABS))
{
logprintf (LOG_VERBOSE, _("\
-Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
- u->dir = odir;
+Not descending to `%s' as it is excluded/not-included.\n"), newdir);
continue;
}
+
con->st &= ~DONE_CWD;
+
+ odir = xstrdup (u->dir); /* because url_set_dir will free
+ u->dir. */
+ url_set_dir (u, newdir);
ftp_retrieve_glob (u, con, GETALL);
+ url_set_dir (u, odir);
+ xfree (odir);
+
/* Set the time-stamp? */
- u->dir = odir;
}
+
if (opt.quota && opt.downloaded > opt.quota)
return QUOTEXC;
else
get the listing, so that the time-stamp is heeded); if it's GLOBALL,
use globbing; if it's GETALL, download the whole directory. */
static uerr_t
-ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
+ftp_retrieve_glob (struct url *u, ccon *con, int action)
{
struct fileinfo *orig, *start;
uerr_t res;
matchres = fnmatch (u->file, f->name, 0);
if (matchres == -1)
{
- logprintf (LOG_NOTQUIET, "%s: %s\n", u->local,
+ logprintf (LOG_NOTQUIET, "%s: %s\n", con->target,
strerror (errno));
break;
}
of URL. Inherently, its capabilities are limited on what can be
encoded into a URL. */
uerr_t
-ftp_loop (struct urlinfo *u, int *dt)
+ftp_loop (struct url *u, int *dt)
{
ccon con; /* FTP connection */
uerr_t res;
{
char *filename = (opt.output_document
? xstrdup (opt.output_document)
- : (u->local ? xstrdup (u->local)
+ : (con.target ? xstrdup (con.target)
: url_filename (u)));
res = ftp_index (filename, u, f);
if (res == FTPOK && opt.verbose)
CLOSE (RBUF_FD (&con.rbuf));
FREE_MAYBE (con.id);
con.id = NULL;
+ FREE_MAYBE (con.target);
+ con.target = NULL;
return res;
}
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
-struct urlinfo;
+struct url;
/* File types. */
enum ftype
correct. */
};
-typedef struct
-{
- int st; /* connection status */
- int cmd; /* command code */
- struct rbuf rbuf; /* control connection buffer */
- long dltime; /* time of the download */
- enum stype rs; /* remote system reported by ftp server */
- char *id; /* initial directory */
-} ccon;
-
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
-uerr_t ftp_loop PARAMS ((struct urlinfo *, int *));
+uerr_t ftp_loop PARAMS ((struct url *, int *));
+
+uerr_t ftp_index (const char *, struct url *, struct fileinfo *);
+
+char ftp_process_type PARAMS ((const char *));
-uerr_t ftp_index (const char *, struct urlinfo *, struct fileinfo *);
#endif /* FTP_H */
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
-accept_domain (struct urlinfo *u)
+accept_domain (struct url *u)
{
assert (u->host != NULL);
if (opt.domains)
#ifndef HOST_H
#define HOST_H
-struct urlinfo;
+struct url;
/* Function declarations */
char *realhost PARAMS ((const char *));
int same_host PARAMS ((const char *, const char *));
-int accept_domain PARAMS ((struct urlinfo *));
+int accept_domain PARAMS ((struct url *));
int sufmatch PARAMS ((const char **, const char *));
char *ftp_getaddress PARAMS ((void));
long dltime; /* time of the download */
int no_truncate; /* whether truncating the file is
forbidden. */
+ const char *referer; /* value of the referer header. */
+ char **local_file; /* local file. */
};
-/* Free the elements of hstat X. */
-#define FREEHSTAT(x) do \
-{ \
- FREE_MAYBE ((x).newloc); \
- FREE_MAYBE ((x).remote_time); \
- FREE_MAYBE ((x).error); \
- (x).newloc = (x).remote_time = (x).error = NULL; \
-} while (0)
+static void
+free_hstat (struct http_stat *hs)
+{
+ FREE_MAYBE (hs->newloc);
+ FREE_MAYBE (hs->remote_time);
+ FREE_MAYBE (hs->error);
+
+ /* Guard against being called twice. */
+ hs->newloc = NULL;
+ hs->remote_time = NULL;
+ hs->error = NULL;
+}
static char *create_authorization_line PARAMS ((const char *, const char *,
const char *, const char *,
response code correctly, it is not used in a sane way. The caller
can do that, though.
- If u->proxy is non-NULL, the URL u will be taken as a proxy URL,
- and u->proxy->url will be given to the proxy server (bad naming,
- I'm afraid). */
+ If PROXY is non-NULL, the connection will be made to the proxy
+ server, and u->url will be requested. */
static uerr_t
-gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
{
- char *request, *type, *command, *path;
+ char *request, *type, *command, *full_path;
char *user, *passwd;
- char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost;
+ char *pragma_h, *referer, *useragent, *range, *wwwauth;
char *authenticate_h;
char *proxyauth;
char *all_headers;
char *port_maybe;
char *request_keep_alive;
- int sock, hcount, num_written, all_length, remport, statcode;
+ int sock, hcount, num_written, all_length, statcode;
long contlen, contrange;
- struct urlinfo *ou;
+ struct url *conn;
uerr_t err;
FILE *fp;
int auth_tried_already;
/* initialize ssl_ctx on first run */
if (!ssl_ctx)
{
- err=init_ssl (&ssl_ctx);
+ err = init_ssl (&ssl_ctx);
if (err != 0)
{
switch (err)
if (!(*dt & HEAD_ONLY))
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
know the local filename so we can save to it. */
- assert (u->local != NULL);
+ assert (*hs->local_file != NULL);
authenticate_h = 0;
auth_tried_already = 0;
- inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL);
+ inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
again:
/* We need to come back here when the initial attempt to retrieve
hs->remote_time = NULL;
hs->error = NULL;
- /* Which structure to use to retrieve the original URL data. */
- if (u->proxy)
- ou = u->proxy;
- else
- ou = u;
+ /* If we're using a proxy, we will be connecting to the proxy
+ server. */
+ conn = proxy ? proxy : u;
/* First: establish the connection. */
if (inhibit_keep_alive
||
#ifndef HAVE_SSL
- !persistent_available_p (u->host, u->port)
+ !persistent_available_p (conn->host, conn->port)
#else
- !persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
+ !persistent_available_p (conn->host, conn->port,
+ u->scheme == SCHEME_HTTPS)
#endif /* HAVE_SSL */
)
{
- logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
- err = make_connection (&sock, u->host, u->port);
+ logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "),
+ conn->host, conn->port);
+ err = make_connection (&sock, conn->host, conn->port);
switch (err)
{
case HOSTERR:
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", conn->host, herrmsg (h_errno));
return HOSTERR;
break;
case CONSOCKERR:
case CONREFUSED:
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET,
- _("Connection to %s:%hu refused.\n"), u->host, u->port);
+ _("Connection to %s:%hu refused.\n"), conn->host,
+ conn->port);
CLOSE (sock);
return CONREFUSED;
case CONERROR:
break;
}
#ifdef HAVE_SSL
- if (u->scheme == SCHEME_HTTPS)
+ if (conn->scheme == SCHEME_HTTPS)
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
{
logputs (LOG_VERBOSE, "\n");
}
else
{
- logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
+ logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
+ conn->host, conn->port);
/* #### pc_last_fd should be accessed through an accessor
function. */
sock = pc_last_fd;
DEBUGP (("Reusing fd %d.\n", sock));
}
- if (u->proxy)
- path = u->proxy->url;
- else
- path = u->path;
-
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
+
referer = NULL;
- if (ou->referer)
+ if (hs->referer)
{
- referer = (char *)alloca (9 + strlen (ou->referer) + 3);
- sprintf (referer, "Referer: %s\r\n", ou->referer);
+ referer = (char *)alloca (9 + strlen (hs->referer) + 3);
+ sprintf (referer, "Referer: %s\r\n", hs->referer);
}
+
if (*dt & SEND_NOCACHE)
pragma_h = "Pragma: no-cache\r\n";
else
pragma_h = "";
+
if (hs->restval)
{
range = (char *)alloca (13 + numdigit (hs->restval) + 4);
sprintf (useragent, "Wget/%s", version_string);
}
/* Construct the authentication, if userid is present. */
- user = ou->user;
- passwd = ou->passwd;
- search_netrc (ou->host, (const char **)&user, (const char **)&passwd, 0);
+ user = u->user;
+ passwd = u->passwd;
+ search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
user = user ? user : opt.http_user;
passwd = passwd ? passwd : opt.http_passwd;
else
{
wwwauth = create_authorization_line (authenticate_h, user, passwd,
- command, ou->path);
+ command, u->path);
}
}
proxyauth = NULL;
- if (u->proxy)
+ if (proxy)
{
char *proxy_user, *proxy_passwd;
/* For normal username and password, URL components override
}
else
{
- proxy_user = u->user;
- proxy_passwd = u->passwd;
+ proxy_user = proxy->user;
+ proxy_passwd = proxy->passwd;
}
- /* #### This is junky. Can't the proxy request, say, `Digest'
- authentication? */
+ /* #### This does not appear right. Can't the proxy request,
+ say, `Digest' authentication? */
if (proxy_user && proxy_passwd)
proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
"Proxy-Authorization");
}
- remhost = ou->host;
- remport = ou->port;
/* String of the form :PORT. Used only for non-standard ports. */
port_maybe = NULL;
- if (1
-#ifdef HAVE_SSL
- && remport != (u->scheme == SCHEME_HTTPS
- ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
-#else
- && remport != DEFAULT_HTTP_PORT
-#endif
- )
+ if (u->port != scheme_default_port (u->scheme))
{
- port_maybe = (char *)alloca (numdigit (remport) + 2);
- sprintf (port_maybe, ":%d", remport);
+ port_maybe = (char *)alloca (numdigit (u->port) + 2);
+ sprintf (port_maybe, ":%d", u->port);
}
if (!inhibit_keep_alive)
request_keep_alive = NULL;
if (opt.cookies)
- cookies = build_cookies_request (ou->host, ou->port, ou->path,
+ cookies = build_cookies_request (u->host, u->port, u->path,
#ifdef HAVE_SSL
- ou->scheme == SCHEME_HTTPS
+ u->scheme == SCHEME_HTTPS
#else
0
#endif
);
+ if (proxy)
+ full_path = xstrdup (u->url);
+ else
+ full_path = url_full_path (u);
+
/* Allocate the memory for the request. */
- request = (char *)alloca (strlen (command) + strlen (path)
+ request = (char *)alloca (strlen (command)
+ + strlen (full_path)
+ strlen (useragent)
- + strlen (remhost)
+ + strlen (u->host)
+ (port_maybe ? strlen (port_maybe) : 0)
+ strlen (HTTP_ACCEPT)
+ (request_keep_alive
Host: %s%s\r\n\
Accept: %s\r\n\
%s%s%s%s%s%s%s%s\r\n",
- command, path, useragent, remhost,
+ command, full_path,
+ useragent, u->host,
port_maybe ? port_maybe : "",
HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
pragma_h,
opt.user_header ? opt.user_header : "");
DEBUGP (("---request begin---\n%s---request end---\n", request));
- /* Free the temporary memory. */
+
+ /* Free the temporary memory. */
FREE_MAYBE (wwwauth);
FREE_MAYBE (proxyauth);
FREE_MAYBE (cookies);
+ xfree (full_path);
/* Send the request to server. */
#ifdef HAVE_SSL
return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
- u->proxy ? "Proxy" : "HTTP");
+ proxy ? "Proxy" : "HTTP");
contlen = contrange = -1;
type = NULL;
statcode = -1;
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
#ifndef HAVE_SSL
- register_persistent (u->host, u->port, sock);
+ register_persistent (conn->host, conn->port, sock);
#else
- register_persistent (u->host, u->port, sock, ssl);
+ register_persistent (conn->host, conn->port, sock, ssl);
#endif /* HAVE_SSL */
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
/* Authorization is required. */
FREE_MAYBE (type);
type = NULL;
- FREEHSTAT (*hs);
+ free_hstat (hs);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
if (auth_tried_already)
text/html file. If some case-insensitive variation on ".htm[l]" isn't
already the file's suffix, tack on ".html". */
{
- char* last_period_in_local_filename = strrchr(u->local, '.');
+ char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
if (last_period_in_local_filename == NULL ||
!(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
strcasecmp(last_period_in_local_filename, ".html") == EQ))
{
- size_t local_filename_len = strlen(u->local);
+ size_t local_filename_len = strlen(*hs->local_file);
- u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
- strcpy(u->local + local_filename_len, ".html");
+ *hs->local_file = xrealloc(*hs->local_file,
+ local_filename_len + sizeof(".html"));
+ strcpy(*hs->local_file + local_filename_len, ".html");
*dt |= ADDED_HTML_EXTENSION;
}
_("\
\n\
Continued download failed on this file, which conflicts with `-c'.\n\
-Refusing to truncate existing file `%s'.\n\n"), u->local);
+Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
CLOSE_INVALIDATE (sock);
/* Open the local file. */
if (!opt.dfp)
{
- mkalldirs (u->local);
+ mkalldirs (*hs->local_file);
if (opt.backups)
- rotate_backups (u->local);
- fp = fopen (u->local, hs->restval ? "ab" : "wb");
+ rotate_backups (*hs->local_file);
+ fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
if (!fp)
{
- logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
+ logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
FREE_MAYBE (all_headers);
/* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */
uerr_t
-http_loop (struct urlinfo *u, char **newloc, int *dt)
+http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
+ int *dt, struct url *proxy)
{
int count;
int use_ts, got_head = 0; /* time-stamping info */
size_t filename_len;
struct http_stat hstat; /* HTTP status */
struct stat st;
+ char *dummy = NULL;
/* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
/* Determine the local filename. */
- if (!u->local)
- u->local = url_filename (u->proxy ? u->proxy : u);
+ if (local_file && *local_file)
+ hstat.local_file = local_file;
+ else if (local_file)
+ {
+ *local_file = url_filename (u);
+ hstat.local_file = local_file;
+ }
+ else
+ {
+ dummy = url_filename (u);
+ hstat.local_file = &dummy;
+ }
if (!opt.output_document)
- locf = u->local;
+ locf = *hstat.local_file;
else
locf = opt.output_document;
- filename_len = strlen (u->local);
+ hstat.referer = referer;
+
+ filename_len = strlen (*hstat.local_file);
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
- if (opt.noclobber && file_exists_p (u->local))
+ if (opt.noclobber && file_exists_p (*hstat.local_file))
{
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file */
logprintf (LOG_VERBOSE, _("\
-File `%s' already there, will not retrieve.\n"), u->local);
+File `%s' already there, will not retrieve.\n"), *hstat.local_file);
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
/* #### Bogusness alert. */
- /* If its suffix is "html" or (yuck!) "htm", we suppose it's
- text/html, a harmless lie. */
- if (((suf = suffix (u->local)) != NULL)
+ /* If its suffix is "html" or "htm", assume text/html. */
+ if (((suf = suffix (*hstat.local_file)) != NULL)
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML;
xfree (suf);
- /* Another harmless lie: */
+
+ FREE_MAYBE (dummy);
return RETROK;
}
in url.c. Replacing sprintf with inline calls to
strcpy() and long_to_string() made a difference.
--hniksic */
- memcpy (filename_plus_orig_suffix, u->local, filename_len);
+ memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
memcpy (filename_plus_orig_suffix + filename_len,
".orig", sizeof (".orig"));
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
- if (stat (u->local, &st) == 0)
- local_filename = u->local;
+ if (stat (*hstat.local_file, &st) == 0)
+ local_filename = *hstat.local_file;
if (local_filename != NULL)
/* There was a local file, so we'll check later to see if the version
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- char *hurl = str_url (u->proxy ? u->proxy : u, 1);
+ char *hurl = url_string (u, 1);
char tmp[15];
strcpy (tmp, " ");
if (count > 1)
Some proxies are notorious for caching incomplete data, so
we require a fresh get.
b) caching is explicitly inhibited. */
- if ((u->proxy && count > 1) /* a */
- || !opt.allow_cache /* b */
+ if ((proxy && count > 1) /* a */
+ || !opt.allow_cache /* b */
)
*dt |= SEND_NOCACHE;
else
*dt &= ~SEND_NOCACHE;
- /* Try fetching the document, or at least its head. :-) */
- err = gethttp (u, &hstat, dt);
+ /* Try fetching the document, or at least its head. */
+ err = gethttp (u, &hstat, dt, proxy);
/* It's unfortunate that wget determines the local filename before finding
out the Content-Type of the file. Barring a major restructuring of the
code, we need to re-set locf here, since gethttp() may have xrealloc()d
- u->local to tack on ".html". */
+ *hstat.local_file to tack on ".html". */
if (!opt.output_document)
- locf = u->local;
+ locf = *hstat.local_file;
else
locf = opt.output_document;
/* Non-fatal errors continue executing the loop, which will
bring them to "while" statement at the end, to judge
whether the number of tries was exceeded. */
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
printwhat (count, opt.ntry);
continue;
break;
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return err;
break;
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
- u->local, strerror (errno));
- FREEHSTAT (hstat);
+ *hstat.local_file, strerror (errno));
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return err;
break;
case CONSSLERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return err;
break;
case NEWLOCATION:
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return WRONGCODE;
}
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return NEWLOCATION;
break;
case RETRUNNEEDED:
/* The file was already fully retrieved. */
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return RETROK;
break;
case RETRFINISHED:
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- char *hurl = str_url (u->proxy ? u->proxy : u, 1);
+ char *hurl = url_string (u, 1);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
xfree (hurl);
}
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return WRONGCODE;
}
logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
local_filename);
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return RETROK;
}
else if (tml >= tmr)
logputs (LOG_VERBOSE,
_("Remote file is newer, retrieving.\n"));
}
- FREEHSTAT (hstat);
+ free_hstat (&hstat);
continue;
}
if ((tmr != (time_t) (-1))
fl = opt.output_document;
}
else
- fl = u->local;
+ fl = *hstat.local_file;
if (fl)
touch (fl, tmr);
}
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
+ FREE_MAYBE (dummy);
return RETROK;
}
- /* It is now safe to free the remainder of hstat, since the
- strings within it will no longer be used. */
- FREEHSTAT (hstat);
-
tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
if (hstat.len == hstat.contlen)
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return RETROK;
}
else if (hstat.res == 0) /* No read error */
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
_("%s (%s) - Connection closed at byte %ld. "),
tms, tmrate, hstat.len);
printwhat (count, opt.ntry);
+ free_hstat (&hstat);
continue;
}
else if (!opt.kill_longer) /* meaning we got more than expected */
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+ free_hstat (&hstat);
+ FREE_MAYBE (dummy);
return RETROK;
}
else /* the same, but not accepted */
_("%s (%s) - Connection closed at byte %ld/%ld. "),
tms, tmrate, hstat.len, hstat.contlen);
printwhat (count, opt.ntry);
+ free_hstat (&hstat);
continue;
}
}
_("%s (%s) - Read error at byte %ld (%s)."),
tms, tmrate, hstat.len, strerror (errno));
printwhat (count, opt.ntry);
+ free_hstat (&hstat);
continue;
}
else /* hstat.res == -1 and contlen is given */
tms, tmrate, hstat.len, hstat.contlen,
strerror (errno));
printwhat (count, opt.ntry);
+ free_hstat (&hstat);
continue;
}
}
/* Fill in the arguments. */
for (i = 0; i < nurl; i++, optind++)
{
- char *rewritten = rewrite_url_maybe (argv[optind]);
+ char *rewritten = rewrite_shorthand_url (argv[optind]);
if (rewritten)
{
printf ("Converted %s to %s\n", argv[optind], rewritten);
{
convert_all_links ();
}
+
log_close ();
for (i = 0; i < nurl; i++)
- free (url[i]);
+ xfree (url[i]);
cleanup ();
+
#ifdef DEBUG_MALLOC
print_malloc_debug_stats ();
#endif
int dt, inl, dash_p_leaf_HTML = FALSE;
int meta_disallow_follow;
int this_url_ftp; /* See below the explanation */
- uerr_t err;
urlpos *url_list, *cur_url;
- struct urlinfo *u;
+ struct url *u;
assert (this_url != NULL);
assert (file != NULL);
hash_table_clear (undesirable_urls);
string_set_add (undesirable_urls, this_url);
/* Enter this_url to the hash table, in original and "enhanced" form. */
- u = newurl ();
- err = parseurl (this_url, u, 0);
- if (err == URLOK)
+ u = url_parse (this_url, NULL);
+ if (u)
{
string_set_add (undesirable_urls, u->url);
if (opt.no_parent)
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
base_dir = NULL;
}
- freeurl (u, 1);
+ url_free (u);
depth = 1;
first_time = 0;
}
break;
/* Parse the URL for convenient use in other functions, as well
as to get the optimized form. It also checks URL integrity. */
- u = newurl ();
- if (parseurl (cur_url->url, u, 0) != URLOK)
+ u = url_parse (cur_url->url, NULL);
+ if (!u)
{
DEBUGP (("Yuck! A bad URL.\n"));
- freeurl (u, 1);
continue;
}
assert (u->url != NULL);
if (!(base_dir && frontcmp (base_dir, u->dir)))
{
/* Failing that, check for parent dir. */
- struct urlinfo *ut = newurl ();
- if (parseurl (this_url, ut, 0) != URLOK)
+ struct url *ut = url_parse (this_url, NULL);
+ if (!ut)
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
else if (!frontcmp (ut->dir, u->dir))
{
string_set_add (undesirable_urls, constr);
inl = 1;
}
- freeurl (ut, 1);
+ url_free (ut);
}
}
/* If the file does not match the acceptance list, or is on the
if (!inl)
{
if (!opt.simple_check)
- opt_url (u);
+ {
+ /* Find the "true" host. */
+ char *host = realhost (u->host);
+ xfree (u->host);
+ u->host = host;
+
+ /* Refresh the printed representation of the URL. */
+ xfree (u->url);
+ u->url = url_string (u, 0);
+ }
else
{
char *p;
for (p = u->host; *p; p++)
*p = TOLOWER (*p);
xfree (u->url);
- u->url = str_url (u, 0);
+ u->url = url_string (u, 0);
}
xfree (constr);
constr = xstrdup (u->url);
/* Free filename and constr. */
FREE_MAYBE (filename);
FREE_MAYBE (constr);
- freeurl (u, 1);
+ url_free (u);
/* Increment the pbuf for the appropriate size. */
}
if (opt.convert_links && !opt.delete_after)
char *local_name;
/* The URL must be in canonical form to be compared. */
- struct urlinfo *u = newurl ();
- uerr_t res = parseurl (cur_url->url, u, 0);
- if (res != URLOK)
- {
- freeurl (u, 1);
- continue;
- }
+ struct url *u = url_parse (cur_url->url, NULL);
+ if (!u)
+ continue;
/* We decide the direction of conversion according to whether
a URL was downloaded. Downloaded URLs will be converted
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
cur_url->convert = CO_CONVERT_TO_COMPLETE;
cur_url->local_name = NULL;
}
- freeurl (u, 1);
+ url_free (u);
}
/* Convert the links in the file. */
convert_links (html->string, urls);
int global_download_count;
void logflush PARAMS ((void));
-
-/* From http.c. */
-uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
\f
/* Flags for show_progress(). */
enum spflags { SP_NONE, SP_INIT, SP_FINISH };
uerr_t result;
char *url;
int location_changed, dummy;
- int local_use_proxy;
+ int use_proxy;
char *mynewloc, *proxy;
- struct urlinfo *u;
+ struct url *u;
+ int up_error_code; /* url parse error code */
+ char *local_file;
struct hash_table *redirections = NULL;
/* If dt is NULL, just ignore it. */
if (file)
*file = NULL;
- u = newurl ();
- /* Parse the URL. */
- result = parseurl (url, u, 0);
- if (result != URLOK)
+ u = url_parse (url, &up_error_code);
+ if (!u)
{
- logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
- freeurl (u, 1);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
if (redirections)
string_set_free (redirections);
xfree (url);
- return result;
+ return URLERROR;
}
+ if (!refurl)
+ refurl = opt.referer;
+
redirected:
- /* Set the referer. */
- if (refurl)
- u->referer = xstrdup (refurl);
- else
- {
- if (opt.referer)
- u->referer = xstrdup (opt.referer);
- else
- u->referer = NULL;
- }
+ result = NOCONERROR;
+ mynewloc = NULL;
+ local_file = NULL;
- local_use_proxy = USE_PROXY_P (u);
- if (local_use_proxy)
+ use_proxy = USE_PROXY_P (u);
+ if (use_proxy)
{
- struct urlinfo *pu = newurl ();
-
- /* Copy the original URL to new location. */
- memcpy (pu, u, sizeof (*u));
- pu->proxy = NULL; /* A minor correction :) */
- /* Initialize u to nil. */
- memset (u, 0, sizeof (*u));
- u->proxy = pu;
- /* Get the appropriate proxy server, appropriate for the
- current scheme. */
- proxy = getproxy (pu->scheme);
+ struct url *proxy_url;
+
+ /* Get the proxy server for the current scheme. */
+ proxy = getproxy (u->scheme);
if (!proxy)
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
- freeurl (u, 1);
+ url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
return PROXERR;
}
+
/* Parse the proxy URL. */
- result = parseurl (proxy, u, 0);
- if (result != URLOK || u->scheme != SCHEME_HTTP)
+ proxy_url = url_parse (proxy, &up_error_code);
+ if (!proxy_url)
{
- if (u->scheme == SCHEME_HTTP)
- logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
- else
- logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
- freeurl (u, 1);
+ logprintf (LOG_NOTQUIET, "Error parsing proxy URL %s: %s.\n",
+ proxy, url_error (up_error_code));
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ return PROXERR;
+ }
+ if (proxy_url->scheme != SCHEME_HTTP)
+ {
+ logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
+ url_free (proxy_url);
if (redirections)
string_set_free (redirections);
xfree (url);
return PROXERR;
}
- u->scheme = SCHEME_HTTP;
- }
-
- mynewloc = NULL;
- if (u->scheme == SCHEME_HTTP
+ result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
+ url_free (proxy_url);
+ }
+ else if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL
|| u->scheme == SCHEME_HTTPS
#endif
)
- result = http_loop (u, &mynewloc, dt);
+ {
+ result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
+ }
else if (u->scheme == SCHEME_FTP)
{
/* If this is a redirection, we must not allow recursive FTP
opt.recursive = 0;
result = ftp_loop (u, dt);
opt.recursive = oldrec;
+#if 0
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML
according to the suffix. The HTML suffixes are `.html' and
- `.htm', case-insensitive.
-
- #### All of this is, of course, crap. These types should be
- determined through mailcap. */
+ `.htm', case-insensitive. */
if (redirections && u->local && (u->scheme == SCHEME_FTP))
{
char *suf = suffix (u->local);
*dt |= TEXTHTML;
FREE_MAYBE (suf);
}
+#endif
}
location_changed = (result == NEWLOCATION);
if (location_changed)
{
char *construced_newloc;
- uerr_t newloc_result;
- struct urlinfo *newloc_struct;
+ struct url *newloc_struct;
assert (mynewloc != NULL);
+ if (local_file)
+ xfree (local_file);
+
/* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs out
there break the rules and use relative URLs, and popular
mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
- newloc_struct = newurl ();
- newloc_result = parseurl (mynewloc, newloc_struct, 1);
- if (newloc_result != URLOK)
+ newloc_struct = url_parse (mynewloc, NULL);
+ if (!newloc_struct)
{
- logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
- freeurl (newloc_struct, 1);
- freeurl (u, 1);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, "UNKNOWN");
+ url_free (newloc_struct);
+ url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
string_set_add (redirections, u->url);
}
- /* The new location is OK. Let's check for redirection cycle by
+ /* The new location is OK. Check for redirection cycle by
peeking through the history of redirections. */
if (string_set_contains (redirections, newloc_struct->url))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
- freeurl (newloc_struct, 1);
- freeurl (u, 1);
+ url_free (newloc_struct);
+ url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
xfree (url);
url = mynewloc;
- freeurl (u, 1);
+ url_free (u);
u = newloc_struct;
goto redirected;
}
- if (u->local)
+ if (local_file)
{
if (*dt & RETROKF)
{
- register_download (url, u->local);
+ register_download (url, local_file);
if (*dt & TEXTHTML)
- register_html (url, u->local);
+ register_html (url, local_file);
}
}
if (file)
- {
- if (u->local)
- *file = xstrdup (u->local);
- else
- *file = NULL;
- }
- freeurl (u, 1);
+ *file = local_file ? local_file : NULL;
+ else
+ FREE_MAYBE (local_file);
+
+ url_free (u);
if (redirections)
string_set_free (redirections);
void sleep_between_retrievals PARAMS ((int));
+/* Because there's no http.h. */
+
+struct url;
+
+uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *,
+ int *, struct url *));
+
+
#endif /* RETR_H */
struct scheme_data
{
- enum url_scheme scheme;
char *leading_string;
int default_port;
};
/* Supported schemes: */
static struct scheme_data supported_schemes[] =
{
- { SCHEME_HTTP, "http://", DEFAULT_HTTP_PORT },
+ { "http://", DEFAULT_HTTP_PORT },
#ifdef HAVE_SSL
- { SCHEME_HTTPS, "https://", DEFAULT_HTTPS_PORT },
+ { "https://", DEFAULT_HTTPS_PORT },
#endif
- { SCHEME_FTP, "ftp://", DEFAULT_FTP_PORT }
+ { "ftp://", DEFAULT_FTP_PORT },
+
+ /* SCHEME_INVALID */
+ { NULL, -1 }
};
-static void parse_dir PARAMS ((const char *, char **, char **));
-static uerr_t parse_uname PARAMS ((const char *, char **, char **));
static char *construct_relative PARAMS ((const char *, const char *));
-static char process_ftp_type PARAMS ((char *));
\f
/* Support for encoding and decoding of URL strings. We determine
#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
-/* rfc1738 reserved chars. We don't use this yet; preservation of
- reserved chars will be implemented when I integrate the new
- `reencode_string' function. */
+/* rfc1738 reserved chars, preserved from encoding. */
#define RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
-/* Unsafe chars:
- - anything <= 32;
- - stuff from rfc1738 ("<>\"#%{}|\\^~[]`");
- - '@' and ':'; needed for encoding URL username and password.
- - anything >= 127. */
+/* rfc1738 unsafe chars, plus some more. */
#define UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */
- U, 0, U, U, 0, U, R, 0, /* SP ! " # $ % & ' */
+ U, 0, U, RU, 0, U, R, 0, /* SP ! " # $ % & ' */
0, 0, 0, R, 0, 0, 0, R, /* ( ) * + , - . / */
0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
- 0, 0, U, R, U, R, U, R, /* 8 9 : ; < = > ? */
+ 0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */
RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
} \
} while (0)
\f
+enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
+
+/* Decide whether to encode, decode, or pass through the char at P.
+ This used to be a macro, but it got a little too convoluted. */
+static inline enum copy_method
+decide_copy_method (const char *p)
+{
+ if (*p == '%')
+ {
+ if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
+ {
+ /* %xx sequence: decode it, unless it would decode to an
+ unsafe or a reserved char; in that case, leave it as
+ is. */
+ char preempt = (XCHAR_TO_XDIGIT (*(p + 1)) << 4) +
+ XCHAR_TO_XDIGIT (*(p + 2));
+
+ if (UNSAFE_CHAR (preempt) || RESERVED_CHAR (preempt))
+ return CM_PASSTHROUGH;
+ else
+ return CM_DECODE;
+ }
+ else
+ /* Garbled %.. sequence: encode `%'. */
+ return CM_ENCODE;
+ }
+ else if (UNSAFE_CHAR (*p) && !RESERVED_CHAR (*p))
+ return CM_ENCODE;
+ else
+ return CM_PASSTHROUGH;
+}
+
+/* Translate a %-quoting (but possibly non-conformant) input string S
+ into a %-quoting (and conformant) output string. If no characters
+ are encoded or decoded, return the same string S; otherwise, return
+ a freshly allocated string with the new contents.
+
+ After a URL has been run through this function, the protocols that
+ use `%' as the quote character can use the resulting string as-is,
+ while those that don't call decode_string() to get to the intended
+ data. This function is also stable: after an input string is
+ transformed the first time, all further transformations of the
+ result yield the same result string.
+
+ Let's discuss why this function is needed.
+
+ Imagine Wget is to retrieve `http://abc.xyz/abc def'. Since a raw
+ space character would mess up the HTTP request, it needs to be
+ quoted, like this:
+
+ GET /abc%20def HTTP/1.0
+
+ So it appears that the unsafe chars need to be quoted, as with
+ encode_string. But what if we're requested to download
+ `abc%20def'? Remember that %-encoding is valid URL syntax, so what
+ the user meant was a literal space, and he was kind enough to quote
+ it. In that case, Wget should obviously leave the `%20' as is, and
+ send the same request as above. So in this case we may not call
+ encode_string.
+
+ But what if the requested URI is `abc%20 def'? If we call
+ encode_string, we end up with `/abc%2520%20def', which is almost
+ certainly not intended. If we don't call encode_string, we are
+ left with the embedded space and cannot send the request. What the
+ user meant was for Wget to request `/abc%20%20def', and this is
+ where reencode_string kicks in.
+
+ Wget used to solve this by first decoding %-quotes, and then
+ encoding all the "unsafe" characters found in the resulting string.
+ This was wrong because it didn't preserve certain URL special
+ (reserved) characters. For instance, URI containing "a%2B+b" (0x2b
+ == '+') would get translated to "a%2B%2Bb" or "a++b" depending on
+ whether we considered `+' reserved (it is). One of these results
+ is inevitable because by the second step we would lose information
+ on whether the `+' was originally encoded or not. Both results
+ were wrong because in CGI parameters + means space, while %2B means
+ literal plus. reencode_string correctly translates the above to
+ "a%2B+b", i.e. returns the original string.
+
+ This function uses an algorithm proposed by Anon Sricharoenchai:
+
+ 1. Encode all URL_UNSAFE and the "%" that are not followed by 2
+ hexdigits.
+
+ 2. Decode all "%XX" except URL_UNSAFE, URL_RESERVED (";/?:@=&") and
+ "+".
+
+ ...except that this code conflates the two steps, and decides
+ whether to encode, decode, or pass through each character in turn.
+ The function still uses two passes, but their logic is the same --
+ the first pass exists merely for the sake of allocation. Another
+ small difference is that we include `+' to URL_RESERVED.
+
+ Anon's test case:
+
+ "http://abc.xyz/%20%3F%%36%31%25aa% a?a=%61+a%2Ba&b=b%26c%3Dc"
+ ->
+ "http://abc.xyz/%20%3F%2561%25aa%25%20a?a=a+a%2Ba&b=b%26c%3Dc"
+
+ Simpler test cases:
+
+ "foo bar" -> "foo%20bar"
+ "foo%20bar" -> "foo%20bar"
+ "foo %20bar" -> "foo%20%20bar"
+ "foo%%20bar" -> "foo%25%20bar" (0x25 == '%')
+ "foo%25%20bar" -> "foo%25%20bar"
+ "foo%2%20bar" -> "foo%252%20bar"
+ "foo+bar" -> "foo+bar" (plus is reserved!)
+ "foo%2b+bar" -> "foo%2b+bar" */
+
+char *
+reencode_string (const char *s)
+{
+ const char *p1;
+ char *newstr, *p2;
+ int oldlen, newlen;
+
+ int encode_count = 0;
+ int decode_count = 0;
+
+ /* First, pass through the string to see if there's anything to do,
+ and to calculate the new length. */
+ for (p1 = s; *p1; p1++)
+ {
+ switch (decide_copy_method (p1))
+ {
+ case CM_ENCODE:
+ ++encode_count;
+ break;
+ case CM_DECODE:
+ ++decode_count;
+ break;
+ case CM_PASSTHROUGH:
+ break;
+ }
+ }
+
+ if (!encode_count && !decode_count)
+ /* The string is good as it is. */
+ return (char *)s; /* C const model sucks. */
+
+ oldlen = p1 - s;
+ /* Each encoding adds two characters (hex digits), while each
+ decoding removes two characters. */
+ newlen = oldlen + 2 * (encode_count - decode_count);
+ newstr = xmalloc (newlen + 1);
+
+ p1 = s;
+ p2 = newstr;
+
+ while (*p1)
+ {
+ switch (decide_copy_method (p1))
+ {
+ case CM_ENCODE:
+ {
+ char c = *p1++;
+ *p2++ = '%';
+ *p2++ = XDIGIT_TO_XCHAR (c >> 4);
+ *p2++ = XDIGIT_TO_XCHAR (c & 0xf);
+ }
+ break;
+ case CM_DECODE:
+ *p2++ = ((XCHAR_TO_XDIGIT (*(p1 + 1)) << 4)
+ + (XCHAR_TO_XDIGIT (*(p1 + 2))));
+ p1 += 3; /* skip %xx */
+ break;
+ case CM_PASSTHROUGH:
+ *p2++ = *p1++;
+ }
+ }
+ *p2 = '\0';
+ assert (p2 - newstr == newlen);
+ return newstr;
+}
+
+/* Run PTR_VAR through reencode_string. If a new string is consed,
+ free PTR_VAR and make it point to the new storage. Obviously,
+ PTR_VAR needs to be an lvalue. */
+
+#define REENCODE(ptr_var) do { \
+ char *rf_new = reencode_string (ptr_var); \
+ if (rf_new != ptr_var) \
+ { \
+ xfree (ptr_var); \
+ ptr_var = rf_new; \
+ } \
+} while (0)
+\f
/* Returns the scheme type if the scheme is supported, or
SCHEME_INVALID if not. */
enum url_scheme
{
int i;
- for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
+ for (i = 0; supported_schemes[i].leading_string; i++)
if (!strncasecmp (url, supported_schemes[i].leading_string,
strlen (supported_schemes[i].leading_string)))
- return supported_schemes[i].scheme;
+ return (enum url_scheme)i;
return SCHEME_INVALID;
}
return *p == ':';
}
+int
+scheme_default_port (enum url_scheme scheme)
+{
+ return supported_schemes[scheme].default_port;
+}
+
/* Skip the username and password, if present here. The function
should be called *not* with the complete URL, but with the part
right after the scheme.
url_skip_uname (const char *url)
{
const char *p;
- const char *q = NULL;
- for (p = url ; *p && *p != '/'; p++)
- if (*p == '@') q = p;
- /* If a `@' was found before the first occurrence of `/', skip
- it. */
- if (q != NULL)
- return q - url + 1;
- else
+
+ /* Look for '@' that comes before '/' or '?'. */
+ p = (const char *)strpbrk (url, "/?@");
+ if (!p || *p != '@')
return 0;
+
+ return p - url + 1;
+}
+
+static int
+parse_uname (const char *str, int len, char **user, char **passwd)
+{
+ char *colon;
+
+ if (len == 0)
+ /* Empty user name not allowed. */
+ return 0;
+
+ colon = memchr (str, ':', len);
+ if (colon == str)
+ /* Empty user name again. */
+ return 0;
+
+ if (colon)
+ {
+ int pwlen = len - (colon + 1 - str);
+ *passwd = xmalloc (pwlen + 1);
+ memcpy (*passwd, colon + 1, pwlen);
+ (*passwd)[pwlen] = '\0';
+ len -= pwlen + 1;
+ }
+ else
+ *passwd = NULL;
+
+ *user = xmalloc (len + 1);
+ memcpy (*user, str, len);
+ (*user)[len] = '\0';
+
+ return 1;
}
/* Used by main.c: detect URLs written using the "shorthand" URL forms
If the URL needs not or cannot be rewritten, return NULL. */
char *
-rewrite_url_maybe (const char *url)
+rewrite_shorthand_url (const char *url)
{
const char *p;
}
}
\f
-/* Allocate a new urlinfo structure, fill it with default values and
- return a pointer to it. */
-struct urlinfo *
-newurl (void)
-{
- struct urlinfo *u;
+static void parse_path PARAMS ((const char *, char **, char **));
- u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
- memset (u, 0, sizeof (*u));
- u->scheme = SCHEME_INVALID;
- return u;
-}
-
-/* Perform a "deep" free of the urlinfo structure. The structure
- should have been created with newurl, but need not have been used.
- If free_pointer is non-0, free the pointer itself. */
-void
-freeurl (struct urlinfo *u, int complete)
+static char *
+strpbrk_or_eos (const char *s, const char *accept)
{
- assert (u != NULL);
- FREE_MAYBE (u->url);
- FREE_MAYBE (u->host);
- FREE_MAYBE (u->path);
- FREE_MAYBE (u->file);
- FREE_MAYBE (u->dir);
- FREE_MAYBE (u->user);
- FREE_MAYBE (u->passwd);
- FREE_MAYBE (u->local);
- FREE_MAYBE (u->referer);
- if (u->proxy)
- freeurl (u->proxy, 1);
- if (complete)
- xfree (u);
- return;
+ char *p = strpbrk (s, accept);
+ if (!p)
+ p = (char *)s + strlen (s);
+ return p;
}
-\f
-enum url_parse_error {
- PE_UNRECOGNIZED_SCHEME, PE_BAD_PORT
+
+static char *parse_errors[] = {
+#define PE_NO_ERROR 0
+ "No error",
+#define PE_UNRECOGNIZED_SCHEME 1
+ "Unrecognized scheme",
+#define PE_EMPTY_HOST 2
+ "Empty host",
+#define PE_BAD_PORT_NUMBER 3
+ "Bad port number",
+#define PE_INVALID_USER_NAME 4
+ "Invalid user name"
};
-/* Extract the given URL of the form
- (http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
- 1. hostname (terminated with `/' or `:')
- 2. port number (terminated with `/'), or chosen for the scheme
- 3. dirname (everything after hostname)
- Most errors are handled. No allocation is done, you must supply
- pointers to allocated memory.
- ...and a host of other stuff :-)
-
- - Recognizes hostname:dir/file for FTP and
- hostname (:portnum)?/dir/file for HTTP.
- - Parses the path to yield directory and file
- - Parses the URL to yield the username and passwd (if present)
- - Decodes the strings, in case they contain "forbidden" characters
- - Writes the result to struct urlinfo
-
- If the argument STRICT is set, it recognizes only the canonical
- form. */
-uerr_t
-parseurl (const char *url, struct urlinfo *u, int strict)
+#define SETERR(p, v) do { \
+ if (p) \
+ *(p) = (v); \
+} while (0)
+
+/* Parse a URL.
+
+ Return a new struct url if successful, NULL on error. In case of
+ error, and if ERROR is not NULL, also set *ERROR to the appropriate
+ error code. */
+struct url *
+url_parse (const char *url, int *error)
{
- int i, l, abs_ftp;
- int recognizable; /* Recognizable URL is the one where
- the scheme was explicitly named,
- i.e. it wasn't deduced from the URL
- format. */
- uerr_t type = URLUNKNOWN;
-
- DEBUGP (("parseurl (\"%s\") -> ", url));
- recognizable = url_has_scheme (url);
- if (strict && !recognizable)
- return URLUNKNOWN;
- for (i = 0, l = 0; i < ARRAY_SIZE (supported_schemes); i++)
+ struct url *u;
+ const char *p;
+
+ enum url_scheme scheme;
+
+ const char *uname_b, *uname_e;
+ const char *host_b, *host_e;
+ const char *path_b, *path_e;
+ const char *params_b, *params_e;
+ const char *query_b, *query_e;
+ const char *fragment_b, *fragment_e;
+
+ int port;
+ char *user = NULL, *passwd = NULL;
+
+ const char *url_orig = url;
+
+ p = url = reencode_string (url);
+
+ scheme = url_scheme (url);
+ if (scheme == SCHEME_INVALID)
{
- l = strlen (supported_schemes[i].leading_string);
- if (!strncasecmp (supported_schemes[i].leading_string, url, l))
- break;
+ SETERR (error, PE_UNRECOGNIZED_SCHEME);
+ return NULL;
}
- /* If scheme is recognizable, but unsupported, bail out, else
- suppose unknown. */
- if (recognizable && i == ARRAY_SIZE (supported_schemes))
- return URLUNKNOWN;
- else if (i == ARRAY_SIZE (supported_schemes))
- type = URLUNKNOWN;
- else
+
+ p += strlen (supported_schemes[scheme].leading_string);
+ uname_b = p;
+ p += url_skip_uname (p);
+ uname_e = p;
+
+ /* scheme://user:pass@host[:port]... */
+ /* ^ */
+
+ /* We attempt to break down the URL into the components path,
+ params, query, and fragment. They are ordered like this:
+
+ scheme://host[:port][/path][;params][?query][#fragment] */
+
+ params_b = params_e = NULL;
+ query_b = query_e = NULL;
+ fragment_b = fragment_e = NULL;
+
+ host_b = p;
+ p = strpbrk_or_eos (p, ":/;?#");
+ host_e = p;
+
+ if (host_b == host_e)
{
- u->scheme = supported_schemes[i].scheme;
- if (u->scheme == SCHEME_HTTP)
- type = URLHTTP;
-#ifdef HAVE_SSL
- if (u->scheme == SCHEME_HTTPS)
- type = URLHTTPS;
-#endif
- if (u->scheme == SCHEME_FTP)
- type = URLFTP;
+ SETERR (error, PE_EMPTY_HOST);
+ return NULL;
}
- if (type == URLUNKNOWN)
- l = 0;
- /* Allow a username and password to be specified (i.e. just skip
- them for now). */
- if (recognizable)
- l += url_skip_uname (url + l);
- for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
- if (i == l)
- return URLBADHOST;
- /* Get the hostname. */
- u->host = strdupdelim (url + l, url + i);
- DEBUGP (("host %s -> ", u->host));
-
- /* Assume no port has been given. */
- u->port = 0;
- if (url[i] == ':')
+ port = scheme_default_port (scheme);
+ if (*p == ':')
{
- /* We have a colon delimiting the hostname. It could mean that
- a port number is following it, or a directory. */
- if (ISDIGIT (url[++i])) /* A port number */
+ const char *port_b, *port_e, *pp;
+
+ /* scheme://host:port/tralala */
+ /* ^ */
+ ++p;
+ port_b = p;
+ p = strpbrk_or_eos (p, "/;?#");
+ port_e = p;
+
+ if (port_b == port_e)
{
- if (type == URLUNKNOWN)
- {
- type = URLHTTP;
- u->scheme = SCHEME_HTTP;
- }
- for (; url[i] && url[i] != '/'; i++)
- if (ISDIGIT (url[i]))
- u->port = 10 * u->port + (url[i] - '0');
- else
- return URLBADPORT;
- if (!u->port)
- return URLBADPORT;
- DEBUGP (("port %hu -> ", u->port));
+ /* http://host:/whatever */
+ /* ^ */
+ SETERR (error, PE_BAD_PORT_NUMBER);
+ return NULL;
}
- else if (type == URLUNKNOWN) /* or a directory */
+
+ for (port = 0, pp = port_b; pp < port_e; pp++)
{
- type = URLFTP;
- u->scheme = SCHEME_FTP;
+ if (!ISDIGIT (*pp))
+ {
+ /* http://host:12randomgarbage/blah */
+ /* ^ */
+ SETERR (error, PE_BAD_PORT_NUMBER);
+ return NULL;
+ }
+ port = 10 * port + (*pp - '0');
}
- else /* or just a misformed port number */
- return URLBADPORT;
}
- else if (type == URLUNKNOWN)
+
+ if (*p == '/')
{
- type = URLHTTP;
- u->scheme = SCHEME_HTTP;
+ ++p;
+ path_b = p;
+ p = strpbrk_or_eos (p, ";?#");
+ path_e = p;
}
- if (!u->port)
+ else
{
- int ind;
- for (ind = 0; ind < ARRAY_SIZE (supported_schemes); ind++)
- if (supported_schemes[ind].scheme == u->scheme)
- break;
- if (ind == ARRAY_SIZE (supported_schemes))
- return URLUNKNOWN;
- u->port = supported_schemes[ind].default_port;
+ /* Path is not allowed not to exist. */
+ path_b = path_e = p;
}
- /* Some delimiter troubles... */
- if (url[i] == '/' && url[i - 1] != ':')
- ++i;
- if (u->scheme == SCHEME_HTTP)
- while (url[i] && url[i] == '/')
- ++i;
- u->path = (char *)xmalloc (strlen (url + i) + 8);
- strcpy (u->path, url + i);
- if (u->scheme == SCHEME_FTP)
+
+ if (*p == ';')
{
- u->ftp_type = process_ftp_type (u->path);
- /* #### We don't handle type `d' correctly yet. */
- if (!u->ftp_type || TOUPPER (u->ftp_type) == 'D')
- u->ftp_type = 'I';
- DEBUGP (("ftp_type %c -> ", u->ftp_type));
+ ++p;
+ params_b = p;
+ p = strpbrk_or_eos (p, "?#");
+ params_e = p;
}
- DEBUGP (("opath %s -> ", u->path));
- /* Parse the username and password (if existing). */
- parse_uname (url, &u->user, &u->passwd);
- /* Decode the strings, as per RFC 1738. */
- decode_string (u->host);
- decode_string (u->path);
- if (u->user)
- decode_string (u->user);
- if (u->passwd)
- decode_string (u->passwd);
- /* Parse the directory. */
- parse_dir (u->path, &u->dir, &u->file);
- DEBUGP (("dir %s -> file %s -> ", u->dir, u->file));
- /* Simplify the directory. */
- path_simplify (u->dir);
- /* Remove the leading `/' in HTTP. */
- if (u->scheme == SCHEME_HTTP && *u->dir == '/')
- strcpy (u->dir, u->dir + 1);
- DEBUGP (("ndir %s\n", u->dir));
- /* Strip trailing `/'. */
- l = strlen (u->dir);
- if (l > 1 && u->dir[l - 1] == '/')
- u->dir[l - 1] = '\0';
- /* Re-create the path: */
- abs_ftp = (u->scheme == SCHEME_FTP && *u->dir == '/');
- /* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
- abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
- strcpy (u->path, abs_ftp ? "%2F" : "/");
- strcat (u->path, abs_ftp ? (u->dir + 1) : u->dir);
- strcat (u->path, *u->dir ? "/" : "");
- strcat (u->path, u->file);
- ENCODE (u->path);
- DEBUGP (("newpath: %s\n", u->path));
- /* Create the clean URL. */
- u->url = str_url (u, 0);
- return URLOK;
-}
-\f
-/* Special versions of DOTP and DDOTP for parse_dir(). They work like
- DOTP and DDOTP, but they also recognize `?' as end-of-string
- delimiter. This is needed for correct handling of query
- strings. */
-
-#define PD_DOTP(x) ((*(x) == '.') && (!*((x) + 1) || *((x) + 1) == '?'))
-#define PD_DDOTP(x) ((*(x) == '.') && (*(x) == '.') \
- && (!*((x) + 2) || *((x) + 2) == '?'))
-
-/* Build the directory and filename components of the path. Both
- components are *separately* malloc-ed strings! It does not change
- the contents of path.
-
- If the path ends with "." or "..", they are (correctly) counted as
- directories. */
-static void
-parse_dir (const char *path, char **dir, char **file)
-{
- int i, l;
-
- l = urlpath_length (path);
- for (i = l; i && path[i] != '/'; i--);
-
- if (!i && *path != '/') /* Just filename */
+ if (*p == '?')
{
- if (PD_DOTP (path) || PD_DDOTP (path))
- {
- *dir = strdupdelim (path, path + l);
- *file = xstrdup (path + l); /* normally empty, but could
- contain ?... */
- }
- else
- {
- *dir = xstrdup (""); /* This is required because of FTP */
- *file = xstrdup (path);
- }
+ ++p;
+ query_b = p;
+ p = strpbrk_or_eos (p, "#");
+ query_e = p;
}
- else if (!i) /* /filename */
+ if (*p == '#')
{
- if (PD_DOTP (path + 1) || PD_DDOTP (path + 1))
- {
- *dir = strdupdelim (path, path + l);
- *file = xstrdup (path + l); /* normally empty, but could
- contain ?... */
- }
- else
- {
- *dir = xstrdup ("/");
- *file = xstrdup (path + 1);
- }
+ ++p;
+ fragment_b = p;
+ p += strlen (p);
+ fragment_e = p;
}
- else /* Nonempty directory with or without a filename */
+ assert (*p == 0);
+
+ if (uname_b != uname_e)
{
- if (PD_DOTP (path + i + 1) || PD_DDOTP (path + i + 1))
+ /* http://user:pass@host */
+ /* ^ ^ */
+ /* uname_b uname_e */
+ if (!parse_uname (uname_b, uname_e - uname_b - 1, &user, &passwd))
{
- *dir = strdupdelim (path, path + l);
- *file = xstrdup (path + l); /* normally empty, but could
- contain ?... */
- }
- else
- {
- *dir = strdupdelim (path, path + i);
- *file = xstrdup (path + i + 1);
+ SETERR (error, PE_INVALID_USER_NAME);
+ return NULL;
}
}
+
+ u = (struct url *)xmalloc (sizeof (struct url));
+ memset (u, 0, sizeof (*u));
+
+ if (url == url_orig)
+ u->url = xstrdup (url);
+ else
+ u->url = (char *)url;
+
+ u->scheme = scheme;
+ u->host = strdupdelim (host_b, host_e);
+ u->port = port;
+ u->user = user;
+ u->passwd = passwd;
+
+ u->path = strdupdelim (path_b, path_e);
+ path_simplify (u->path);
+
+ if (params_b)
+ u->params = strdupdelim (params_b, params_e);
+ if (query_b)
+ u->query = strdupdelim (query_b, query_e);
+ if (fragment_b)
+ u->fragment = strdupdelim (fragment_b, fragment_e);
+
+ parse_path (u->path, &u->dir, &u->file);
+
+ return u;
}
-/* Find the optional username and password within the URL, as per
- RFC1738. The returned user and passwd char pointers are
- malloc-ed. */
-static uerr_t
-parse_uname (const char *url, char **user, char **passwd)
+const char *
+url_error (int error_code)
{
- int l;
- const char *p, *q, *col;
- char **where;
-
- *user = NULL;
- *passwd = NULL;
-
- /* Look for the end of the scheme identifier. */
- l = url_skip_scheme (url);
- if (!l)
- return URLUNKNOWN;
- url += l;
- /* Is there an `@' character? */
- for (p = url; *p && *p != '/'; p++)
- if (*p == '@')
- break;
- /* If not, return. */
- if (*p != '@')
- return URLOK;
- /* Else find the username and password. */
- for (p = q = col = url; *p && *p != '/'; p++)
- {
- if (*p == ':' && !*user)
- {
- *user = (char *)xmalloc (p - url + 1);
- memcpy (*user, url, p - url);
- (*user)[p - url] = '\0';
- col = p + 1;
- }
- if (*p == '@') q = p;
- }
- /* Decide whether you have only the username or both. */
- where = *user ? passwd : user;
- *where = (char *)xmalloc (q - col + 1);
- memcpy (*where, col, q - col);
- (*where)[q - col] = '\0';
- return URLOK;
+ assert (error_code >= 0 && error_code < ARRAY_SIZE (parse_errors));
+ return parse_errors[error_code];
}
-/* If PATH ends with `;type=X', return the character X. */
-static char
-process_ftp_type (char *path)
+static void
+parse_path (const char *quoted_path, char **dir, char **file)
{
- int len = strlen (path);
+ char *path, *last_slash;
+
+ STRDUP_ALLOCA (path, quoted_path);
+ decode_string (path);
- if (len >= 7
- && !memcmp (path + len - 7, ";type=", 6))
+ last_slash = strrchr (path, '/');
+ if (!last_slash)
{
- path[len - 7] = '\0';
- return path[len - 1];
+ *dir = xstrdup ("");
+ *file = xstrdup (path);
}
else
- return '\0';
+ {
+ *dir = strdupdelim (path, last_slash);
+ *file = xstrdup (last_slash + 1);
+ }
}
-\f
-/* Recreate the URL string from the data in urlinfo. This can be used
- to create a "canonical" representation of the URL. If `hide' is
- non-zero (as it is when we're calling this on a URL we plan to
- print, but not when calling it to canonicalize a URL for use within
- the program), password will be hidden. The forbidden characters in
- the URL will be cleansed. */
+
+/* Note: URL's "full path" is the path with the query string and
+ params appended. The "fragment" (#foo) is intentionally ignored,
+ but that might be changed. For example, if the original URL was
+ "http://host:port/foo/bar/baz;bullshit?querystring#uselessfragment",
+ the full path will be "/foo/bar/baz;bullshit?querystring". */
+
+/* Return the length of the full path, without the terminating
+ zero. */
+
+static int
+full_path_length (const struct url *url)
+{
+ int len = 0;
+
+#define FROB(el) if (url->el) len += 1 + strlen (url->el)
+
+ FROB (path);
+ FROB (params);
+ FROB (query);
+
+#undef FROB
+
+ return len;
+}
+
+/* Write out the full path. */
+
+static void
+full_path_write (const struct url *url, char *where)
+{
+#define FROB(el, chr) do { \
+ char *f_el = url->el; \
+ if (f_el) { \
+ int l = strlen (f_el); \
+ *where++ = chr; \
+ memcpy (where, f_el, l); \
+ where += l; \
+ } \
+} while (0)
+
+ FROB (path, '/');
+ FROB (params, ';');
+ FROB (query, '?');
+
+#undef FROB
+}
+
+/* Public function for getting the "full path". */
char *
-str_url (const struct urlinfo *u, int hide)
+url_full_path (const struct url *url)
{
- char *res, *host, *user, *passwd, *scheme_name, *dir, *file;
- int i, l, ln, lu, lh, lp, lf, ld;
- unsigned short default_port;
+ int length = full_path_length (url);
+ char *full_path = (char *)xmalloc(length + 1);
- /* Look for the scheme. */
- for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
- if (supported_schemes[i].scheme == u->scheme)
- break;
- if (i == ARRAY_SIZE (supported_schemes))
- return NULL;
- scheme_name = supported_schemes[i].leading_string;
- default_port = supported_schemes[i].default_port;
- host = encode_string (u->host);
- dir = encode_string (u->dir);
- file = encode_string (u->file);
- user = passwd = NULL;
- if (u->user)
- user = encode_string (u->user);
- if (u->passwd)
- {
- if (hide)
- /* Don't output the password, or someone might see it over the user's
- shoulder (or in saved wget output). Don't give away the number of
- characters in the password, either, as we did in past versions of
- this code, when we replaced the password characters with 'x's. */
- passwd = xstrdup("<password>");
- else
- passwd = encode_string (u->passwd);
- }
- if (u->scheme == SCHEME_FTP && *dir == '/')
- {
- char *tmp = (char *)xmalloc (strlen (dir) + 3);
- /*sprintf (tmp, "%%2F%s", dir + 1);*/
- tmp[0] = '%';
- tmp[1] = '2';
- tmp[2] = 'F';
- strcpy (tmp + 3, dir + 1);
- xfree (dir);
- dir = tmp;
- }
+ full_path_write (url, full_path);
+ full_path[length] = '\0';
- ln = strlen (scheme_name);
- lu = user ? strlen (user) : 0;
- lp = passwd ? strlen (passwd) : 0;
- lh = strlen (host);
- ld = strlen (dir);
- lf = strlen (file);
- res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
- /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", scheme_name,
- (user ? user : ""), (passwd ? ":" : ""),
- (passwd ? passwd : ""), (user ? "@" : ""),
- host, u->port, dir, *dir ? "/" : "", file); */
- l = 0;
- memcpy (res, scheme_name, ln);
- l += ln;
- if (user)
+ return full_path;
+}
+
+/* Sync u->path and u->url with u->dir and u->file. */
+static void
+sync_path (struct url *url)
+{
+ char *newpath;
+
+ xfree (url->path);
+
+ if (!*url->dir)
{
- memcpy (res + l, user, lu);
- l += lu;
- if (passwd)
- {
- res[l++] = ':';
- memcpy (res + l, passwd, lp);
- l += lp;
- }
- res[l++] = '@';
+ newpath = xstrdup (url->file);
+ REENCODE (newpath);
}
- memcpy (res + l, host, lh);
- l += lh;
- if (u->port != default_port)
+ else
{
- res[l++] = ':';
- long_to_string (res + l, (long)u->port);
- l += numdigit (u->port);
+ int dirlen = strlen (url->dir);
+ int filelen = strlen (url->file);
+
+ newpath = xmalloc (dirlen + 1 + filelen + 1);
+ memcpy (newpath, url->dir, dirlen);
+ newpath[dirlen] = '/';
+ memcpy (newpath + dirlen + 1, url->file, filelen);
+ newpath[dirlen + 1 + filelen] = '\0';
+ REENCODE (newpath);
}
- res[l++] = '/';
- memcpy (res + l, dir, ld);
- l += ld;
- if (*dir)
- res[l++] = '/';
- strcpy (res + l, file);
- xfree (host);
- xfree (dir);
- xfree (file);
- FREE_MAYBE (user);
- FREE_MAYBE (passwd);
- return res;
+
+ url->path = newpath;
+
+ /* Synchronize u->url. */
+ xfree (url->url);
+ url->url = url_string (url, 0);
}
-/* Check whether two URL-s are equivalent, i.e. pointing to the same
- location. Uses parseurl to parse them, and compares the canonical
- forms.
+/* Mutators. Code in ftp.c insists on changing u->dir and u->file.
+ This way we can sync u->path and u->url when they get changed. */
- Returns 1 if the URL1 is equivalent to URL2, 0 otherwise. Also
- return 0 on error. */
-/* Do not compile unused code. */
-#if 0
-int
-url_equal (const char *url1, const char *url2)
+void
+url_set_dir (struct url *url, const char *newdir)
{
- struct urlinfo *u1, *u2;
- uerr_t err;
- int res;
+ xfree (url->dir);
+ url->dir = xstrdup (newdir);
+ sync_path (url);
+}
- u1 = newurl ();
- err = parseurl (url1, u1, 0);
- if (err != URLOK)
- {
- freeurl (u1, 1);
- return 0;
- }
- u2 = newurl ();
- err = parseurl (url2, u2, 0);
- if (err != URLOK)
- {
- freeurl (u1, 1);
- freeurl (u2, 1);
- return 0;
- }
- res = !strcmp (u1->url, u2->url);
- freeurl (u1, 1);
- freeurl (u2, 1);
- return res;
+void
+url_set_file (struct url *url, const char *newfile)
+{
+ xfree (url->file);
+ url->file = xstrdup (newfile);
+ sync_path (url);
+}
+
+void
+url_free (struct url *url)
+{
+ xfree (url->host);
+ xfree (url->path);
+ xfree (url->url);
+
+ FREE_MAYBE (url->params);
+ FREE_MAYBE (url->query);
+ FREE_MAYBE (url->fragment);
+ FREE_MAYBE (url->user);
+ FREE_MAYBE (url->passwd);
+ FREE_MAYBE (url->dir);
+ FREE_MAYBE (url->file);
+
+ xfree (url);
}
-#endif /* 0 */
\f
urlpos *
get_urls_file (const char *file)
/* Return the path name of the URL-equivalent file name, with a
remote-like structure of directories. */
static char *
-mkstruct (const struct urlinfo *u)
+mkstruct (const struct url *u)
{
char *host, *dir, *file, *res, *dirpref;
int l;
- assert (u->dir != NULL);
- assert (u->host != NULL);
-
if (opt.cut_dirs)
{
char *ptr = u->dir + (*u->dir == '/');
return res;
}
-/* Return a malloced copy of S, but protect any '/' characters. */
+/* Compose a file name out of BASE, an unescaped file name, and QUERY,
+ an escaped query string. The trick is to make sure that unsafe
+ characters in BASE are escaped, and that slashes in QUERY are also
+ escaped. */
static char *
-file_name_protect_query_string (const char *s)
+compose_file_name (char *base, char *query)
{
- const char *from;
- char *to, *dest;
- int destlen = 0;
- for (from = s; *from; from++)
+ char result[256];
+ char *from;
+ char *to = result;
+
+ /* Copy BASE to RESULT and encode all unsafe characters. */
+ from = base;
+ while (*from && to - result < sizeof (result))
{
- ++destlen;
- if (*from == '/')
- destlen += 2; /* each / gets replaced with %2F, so
- it adds two more chars. */
+ if (UNSAFE_CHAR (*from))
+ {
+ const unsigned char c = *from++;
+ *to++ = '%';
+ *to++ = XDIGIT_TO_XCHAR (c >> 4);
+ *to++ = XDIGIT_TO_XCHAR (c & 0xf);
+ }
+ else
+ *to++ = *from++;
}
- dest = (char *)xmalloc (destlen + 1);
- for (from = s, to = dest; *from; from++)
+
+ if (query && to - result < sizeof (result))
{
- if (*from != '/')
- *to++ = *from;
- else
+ *to++ = '?';
+
+ /* Copy QUERY to RESULT and encode all '/' characters. */
+ from = query;
+ while (*from && to - result < sizeof (result))
{
- *to++ = '%';
- *to++ = '2';
- *to++ = 'F';
+ if (*from == '/')
+ {
+ *to++ = '%';
+ *to++ = '2';
+ *to++ = 'F';
+ ++from;
+ }
+ else
+ *to++ = *from++;
}
}
- assert (to - dest == destlen);
- *to = '\0';
- return dest;
+
+ if (to - result < sizeof (result))
+ *to = '\0';
+ else
+ /* Truncate input which is too long, presumably due to a huge
+ query string. */
+ result[sizeof (result) - 1] = '\0';
+
+ return xstrdup (result);
}
/* Create a unique filename, corresponding to a given URL. Calls
mkstruct if necessary. Does *not* actually create any directories. */
char *
-url_filename (const struct urlinfo *u)
+url_filename (const struct url *u)
{
char *file, *name;
int have_prefix = 0; /* whether we must prepend opt.dir_prefix */
}
else
{
- if (!*u->file)
- file = xstrdup ("index.html");
- else
- {
- /* If the URL came with a query string, u->file will contain
- a question mark followed by query string contents. These
- contents can contain '/' which would make us create
- unwanted directories. These slashes must be protected
- explicitly. */
- if (!strchr (u->file, '/'))
- file = xstrdup (u->file);
- else
- {
- /*assert (strchr (u->file, '?') != NULL);*/
- file = file_name_protect_query_string (u->file);
- }
- }
+ char *base = *u->file ? u->file : "index.html";
+ char *query = u->query && *u->query ? u->query : NULL;
+ file = compose_file_name (base, query);
}
if (!have_prefix)
return uri_merge_1 (base, link, strlen (link), !url_has_scheme (link));
}
\f
-/* Optimize URL by host, destructively replacing u->host with realhost
- (u->host). Do this regardless of opt.simple_check. */
-void
-opt_url (struct urlinfo *u)
+#define APPEND(p, s) do { \
+ int len = strlen (s); \
+ memcpy (p, s, len); \
+ p += len; \
+} while (0)
+
+/* Use this instead of password when the actual password is supposed
+ to be hidden. We intentionally use a generic string without giving
+ away the number of characters in the password, like previous
+ versions did. */
+#define HIDDEN_PASSWORD "*password*"
+
+/* Recreate the URL string from the data in URL.
+
+ If HIDE is non-zero (as it is when we're calling this on a URL we
+ plan to print, but not when calling it to canonicalize a URL for
+ use within the program), password will be hidden. Unsafe
+ characters in the URL will be quoted. */
+
+char *
+url_string (const struct url *url, int hide_password)
{
- /* Find the "true" host. */
- char *host = realhost (u->host);
- xfree (u->host);
- u->host = host;
- assert (u->dir != NULL); /* the URL must have been parsed */
- /* Refresh the printed representation. */
- xfree (u->url);
- u->url = str_url (u, 0);
+ int size;
+ char *result, *p;
+ char *quoted_user = NULL, *quoted_passwd = NULL;
+
+ int scheme_port = supported_schemes[url->scheme].default_port;
+ char *scheme_str = supported_schemes[url->scheme].leading_string;
+ int fplen = full_path_length (url);
+
+ assert (scheme_str != NULL);
+
+ /* Make sure the user name and password are quoted. */
+ if (url->user)
+ {
+ quoted_user = encode_string_maybe (url->user);
+ if (url->passwd)
+ {
+ if (hide_password)
+ quoted_passwd = HIDDEN_PASSWORD;
+ else
+ quoted_passwd = encode_string_maybe (url->passwd);
+ }
+ }
+
+ size = (strlen (scheme_str)
+ + strlen (url->host)
+ + fplen
+ + 1);
+ if (url->port != scheme_port)
+ size += 1 + numdigit (url->port);
+ if (quoted_user)
+ {
+ size += 1 + strlen (quoted_user);
+ if (quoted_passwd)
+ size += 1 + strlen (quoted_passwd);
+ }
+
+ p = result = xmalloc (size);
+
+ APPEND (p, scheme_str);
+ if (quoted_user)
+ {
+ APPEND (p, quoted_user);
+ if (quoted_passwd)
+ {
+ *p++ = ':';
+ APPEND (p, quoted_passwd);
+ }
+ *p++ = '@';
+ }
+
+ APPEND (p, url->host);
+ if (url->port != scheme_port)
+ {
+ *p++ = ':';
+ long_to_string (p, url->port);
+ p += strlen (p);
+ }
+
+ full_path_write (url, p);
+ p += fplen;
+ *p++ = '\0';
+
+ assert (p - result == size);
+
+ if (quoted_user && quoted_user != url->user)
+ xfree (quoted_user);
+ if (quoted_passwd && !hide_password
+ && quoted_passwd != url->passwd)
+ xfree (quoted_passwd);
+
+ return result;
}
\f
/* Returns proxy host address, in accordance with SCHEME. */
return NULL;
/* Handle shorthands. */
- rewritten_url = rewrite_url_maybe (proxy);
+ rewritten_url = rewrite_shorthand_url (proxy);
if (rewritten_url)
{
strncpy (rewritten_storage, rewritten_url, sizeof(rewritten_storage));
#define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443
+/* Note: the ordering here is related to the order of elements in
+ `supported_schemes' in url.c. */
+
enum url_scheme {
SCHEME_HTTP,
#ifdef HAVE_SSL
};
/* Structure containing info on a URL. */
-struct urlinfo
+struct url
{
- char *url; /* Unchanged URL */
+ char *url; /* Original URL */
enum url_scheme scheme; /* URL scheme */
char *host; /* Extracted hostname */
- unsigned short port;
- char ftp_type;
- char *path, *dir, *file, *qstring;
- /* Path, dir, file, and query string
- (properly decoded) */
- char *user, *passwd; /* Username and password */
- struct urlinfo *proxy; /* The exact string to pass to proxy
- server */
- char *referer; /* The source from which the request
- URI was obtained */
- char *local; /* The local filename of the URL
- document */
+ int port; /* Port number */
+
+ /* URL components (URL-quoted). */
+ char *path;
+ char *params;
+ char *query;
+ char *fragment;
+
+ /* Extracted path info (unquoted). */
+ char *dir;
+ char *file;
+
+ /* Username and password (unquoted). */
+ char *user;
+ char *passwd;
};
enum convert_options {
char *encode_string PARAMS ((const char *));
-struct urlinfo *newurl PARAMS ((void));
-void freeurl PARAMS ((struct urlinfo *, int));
-enum url_scheme url_detect_scheme PARAMS ((const char *));
+struct url *url_parse PARAMS ((const char *, int *));
+const char *url_error PARAMS ((int));
+char *url_full_path PARAMS ((const struct url *));
+void url_set_dir PARAMS ((struct url *, const char *));
+void url_set_file PARAMS ((struct url *, const char *));
+void url_free PARAMS ((struct url *));
+
+enum url_scheme url_scheme PARAMS ((const char *));
int url_skip_scheme PARAMS ((const char *));
int url_has_scheme PARAMS ((const char *));
+int scheme_default_port PARAMS ((enum url_scheme));
+
int url_skip_uname PARAMS ((const char *));
-uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
-char *str_url PARAMS ((const struct urlinfo *, int));
-/* url_equal is not currently used. */
-#if 0
-int url_equal PARAMS ((const char *, const char *));
-#endif /* 0 */
+char *url_string PARAMS ((const struct url *, int));
urlpos *get_urls_file PARAMS ((const char *));
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *));
-char *url_filename PARAMS ((const struct urlinfo *));
-void opt_url PARAMS ((struct urlinfo *));
+char *url_filename PARAMS ((const struct url *));
char *getproxy PARAMS ((uerr_t));
int no_proxy_match PARAMS ((const char *, const char **));
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
-char *rewrite_url_maybe PARAMS ((const char *));
+char *rewrite_shorthand_url PARAMS ((const char *));
#endif /* URL_H */
ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return output;
}
-
-/* Returns an error message for ERRNUM. #### This requires more work.
- This function, as well as the whole error system, is very
- ill-conceived. */
-const char *
-uerrmsg (uerr_t errnum)
-{
- switch (errnum)
- {
- case URLUNKNOWN:
- return _("Unknown/unsupported protocol");
- break;
- case URLBADPORT:
- return _("Invalid port specification");
- break;
- case URLBADHOST:
- return _("Invalid host name");
- break;
- default:
- abort ();
- /* $@#@#$ compiler. */
- return NULL;
- }
-}
\f
/* The Windows versions of the following two functions are defined in
mswindows.c. */
}
#endif /* not WINDOWS */
\f
+char *
+ps (char *orig)
+{
+ char *r = xstrdup (orig);
+ path_simplify (r);
+ return r;
+}
+
/* Canonicalize PATH, and return a new path. The new path differs from PATH
in that:
Multple `/'s are collapsed to a single `/'.
Always use '/' as stub_char.
Don't check for local things using canon_stat.
Change the original string instead of strdup-ing.
- React correctly when beginning with `./' and `../'. */
+ React correctly when beginning with `./' and `../'.
+ Don't zip out trailing slashes. */
void
path_simplify (char *path)
{
i = start + 1;
}
- /* Check for trailing `/'. */
- if (start && !path[i])
- {
- zero_last:
- path[--i] = '\0';
- break;
- }
-
/* Check for `../', `./' or trailing `.' by itself. */
if (path[i] == '.')
{
/* Handle trailing `.' by itself. */
if (!path[i + 1])
- goto zero_last;
+ {
+ path[--i] = '\0';
+ break;
+ }
/* Handle `./'. */
if (path[i + 1] == '/')
}
} /* path == '.' */
} /* while */
-
- if (!*path)
- {
- *path = stub_char;
- path[1] = '\0';
- }
}
\f
/* "Touch" FILE, i.e. make its atime and mtime equal to the time
}
return 0;
}
+
+/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
+ should be a file name. For example, file_merge("/foo/bar", "baz")
+ will return "/foo/baz". file_merge("/foo/bar/", "baz") will return
+ "foo/bar/baz".
+
+ In other words, it's a simpler and gentler version of uri_merge_1. */
+
+char *
+file_merge (const char *base, const char *file)
+{
+ char *result;
+ const char *cut = (const char *)strrchr (base, '/');
+
+ if (!cut)
+ cut = base + strlen (base);
+
+ result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
+ memcpy (result, base, cut - base);
+ result[cut - base] = '/';
+ strcpy (result + (cut - base) + 1, file);
+
+ return result;
+}
\f
static int in_acclist PARAMS ((const char *const *, const char *, int));
char *time_str PARAMS ((time_t *));
char *datetime_str PARAMS ((time_t *));
-const char *uerrmsg PARAMS ((uerr_t));
-
#ifdef DEBUG_MALLOC
void print_malloc_debug_stats ();
#endif
int file_non_directory_p PARAMS ((const char *));
int make_directory PARAMS ((const char *));
char *unique_name PARAMS ((const char *));
+char *file_merge PARAMS ((const char *, const char *));
int acceptable PARAMS ((const char *));
int accdir PARAMS ((const char *s, enum accd));
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
- FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLHTTPS,
- URLOK, URLHTTP, URLFTP, URLFILE, URLUNKNOWN, URLBADPORT,
- URLBADHOST, FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
+ FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR,
+ FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
FTPINVPASV, FTPNOPASV,
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,