#include "utils.h"
#include "url.h"
#include "host.h"
+#include "hash.h"
#ifndef errno
extern int errno;
{
char *leading_string;
int default_port;
+ int enabled;
};
/* Supported schemes: */
static struct scheme_data supported_schemes[] =
{
- { "http://", DEFAULT_HTTP_PORT },
+ { "http://", DEFAULT_HTTP_PORT, 1 },
#ifdef HAVE_SSL
- { "https://", DEFAULT_HTTPS_PORT },
+ { "https://", DEFAULT_HTTPS_PORT, 1 },
#endif
- { "ftp://", DEFAULT_FTP_PORT },
+ { "ftp://", DEFAULT_FTP_PORT, 1 },
/* SCHEME_INVALID */
- { NULL, -1 }
+ { NULL, -1, 0 }
};
static char *construct_relative PARAMS ((const char *, const char *));
{
if (UNSAFE_CHAR (*p1))
{
- const unsigned char c = *p1++;
+ unsigned char c = *p1++;
*p2++ = '%';
*p2++ = XDIGIT_TO_XCHAR (c >> 4);
*p2++ = XDIGIT_TO_XCHAR (c & 0xf);
"foo+bar" -> "foo+bar" (plus is reserved!)
"foo%2b+bar" -> "foo%2b+bar" */
-char *
+static char *
reencode_string (const char *s)
{
const char *p1;
{
case CM_ENCODE:
{
- char c = *p1++;
+ unsigned char c = *p1++;
*p2++ = '%';
*p2++ = XDIGIT_TO_XCHAR (c >> 4);
*p2++ = XDIGIT_TO_XCHAR (c & 0xf);
int i;
for (i = 0; supported_schemes[i].leading_string; i++)
- if (!strncasecmp (url, supported_schemes[i].leading_string,
- strlen (supported_schemes[i].leading_string)))
- return (enum url_scheme)i;
+ if (0 == strncasecmp (url, supported_schemes[i].leading_string,
+ strlen (supported_schemes[i].leading_string)))
+ {
+ if (supported_schemes[i].enabled)
+ return (enum url_scheme) i;
+ else
+ return SCHEME_INVALID;
+ }
+
return SCHEME_INVALID;
}
return supported_schemes[scheme].default_port;
}
+void
+scheme_disable (enum url_scheme scheme)
+{
+ supported_schemes[scheme].enabled = 0;
+}
+
/* Skip the username and password, if present here. The function
should be called *not* with the complete URL, but with the part
right after the scheme.
if (*p == ':')
{
- const char *pp, *path;
+ const char *pp;
char *res;
/* If the characters after the colon and before the next slash
or end of string are all digits, it's HTTP. */
int digits = 0;
for (pp = p + 1; ISDIGIT (*pp); pp++)
++digits;
- if (digits > 0
- && (*pp == '/' || *pp == '\0'))
+ if (digits > 0 && (*pp == '/' || *pp == '\0'))
goto http;
/* Prepend "ftp://" to the entire URL... */
- path = p + 1;
res = xmalloc (6 + strlen (url) + 1);
sprintf (res, "ftp://%s", url);
/* ...and replace ':' with '/'. */
return p;
}
+/* Turn STR into lowercase; return non-zero if a character was
+ actually changed. */
+
+static int
+lowercase_str (char *str)
+{
+ int change = 0;
+ for (; *str; str++)
+ if (ISUPPER (*str))
+ {
+ change = 1;
+ *str = TOLOWER (*str);
+ }
+ return change;
+}
+
static char *parse_errors[] = {
#define PE_NO_ERROR 0
"No error",
-#define PE_UNRECOGNIZED_SCHEME 1
- "Unrecognized scheme",
+#define PE_UNSUPPORTED_SCHEME 1
+ "Unsupported scheme",
#define PE_EMPTY_HOST 2
"Empty host",
#define PE_BAD_PORT_NUMBER 3
{
struct url *u;
const char *p;
+ int path_modified, host_modified;
enum url_scheme scheme;
int port;
char *user = NULL, *passwd = NULL;
- const char *url_orig = url;
-
- p = url = reencode_string (url);
+ char *url_encoded;
scheme = url_scheme (url);
if (scheme == SCHEME_INVALID)
{
- SETERR (error, PE_UNRECOGNIZED_SCHEME);
+ SETERR (error, PE_UNSUPPORTED_SCHEME);
return NULL;
}
+ url_encoded = reencode_string (url);
+ p = url_encoded;
+
p += strlen (supported_schemes[scheme].leading_string);
uname_b = p;
p += url_skip_uname (p);
u = (struct url *)xmalloc (sizeof (struct url));
memset (u, 0, sizeof (*u));
- if (url == url_orig)
- u->url = xstrdup (url);
- else
- u->url = (char *)url;
-
u->scheme = scheme;
u->host = strdupdelim (host_b, host_e);
u->port = port;
u->passwd = passwd;
u->path = strdupdelim (path_b, path_e);
- path_simplify (u->path);
+ path_modified = path_simplify (u->path);
+ parse_path (u->path, &u->dir, &u->file);
+
+ host_modified = lowercase_str (u->host);
if (params_b)
u->params = strdupdelim (params_b, params_e);
if (fragment_b)
u->fragment = strdupdelim (fragment_b, fragment_e);
- parse_path (u->path, &u->dir, &u->file);
+ if (path_modified || u->fragment || host_modified || path_b == path_e)
+ {
+ /* If we suspect that a transformation has rendered what
+ url_string might return different from URL_ENCODED, rebuild
+ u->url using url_string. */
+ u->url = url_string (u, 0);
+
+ if (url_encoded != url)
+ xfree ((char *) url_encoded);
+ }
+ else
+ {
+ if (url_encoded == url)
+ u->url = xstrdup (url);
+ else
+ u->url = url_encoded;
+ }
+ url_encoded = NULL;
return u;
}
#undef FROB
}
-/* Public function for getting the "full path". */
+/* Public function for getting the "full path". E.g. if u->path is
+ "foo/bar" and u->query is "param=value", full_path will be
+ "/foo/bar?param=value". */
+
char *
url_full_path (const struct url *url)
{
}
/* Sync u->path and u->url with u->dir and u->file. */
+
static void
sync_path (struct url *url)
{
FREE_MAYBE (url->fragment);
FREE_MAYBE (url->user);
FREE_MAYBE (url->passwd);
- FREE_MAYBE (url->dir);
- FREE_MAYBE (url->file);
+
+ xfree (url->dir);
+ xfree (url->file);
xfree (url);
}
\f
-urlpos *
+struct urlpos *
get_urls_file (const char *file)
{
struct file_memory *fm;
- urlpos *head, *tail;
+ struct urlpos *head, *tail;
const char *text, *text_end;
/* Load the file. */
--line_end;
if (line_end > line_beg)
{
- urlpos *entry = (urlpos *)xmalloc (sizeof (urlpos));
+ /* URL is in the [line_beg, line_end) region. */
+
+ int up_error_code;
+ char *url_text;
+ struct urlpos *entry;
+ struct url *url;
+
+ /* We must copy the URL to a zero-terminated string, and we
+ can't use alloca because we're in a loop. *sigh*. */
+ url_text = strdupdelim (line_beg, line_end);
+
+ if (opt.base_href)
+ {
+ /* Merge opt.base_href with URL. */
+ char *merged = uri_merge (opt.base_href, url_text);
+ xfree (url_text);
+ url_text = merged;
+ }
+
+ url = url_parse (url_text, &up_error_code);
+ if (!url)
+ {
+ logprintf (LOG_NOTQUIET, "%s: Invalid URL %s: %s\n",
+ file, url_text, url_error (up_error_code));
+ xfree (url_text);
+ continue;
+ }
+ xfree (url_text);
+
+ entry = (struct urlpos *)xmalloc (sizeof (struct urlpos));
memset (entry, 0, sizeof (*entry));
entry->next = NULL;
- entry->url = strdupdelim (line_beg, line_end);
+ entry->url = url;
+
if (!head)
head = entry;
else
\f
/* Free the linked list of urlpos. */
void
-free_urlpos (urlpos *l)
+free_urlpos (struct urlpos *l)
{
while (l)
{
- urlpos *next = l->next;
- xfree (l->url);
+ struct urlpos *next = l->next;
+ if (l->url)
+ url_free (l->url);
FREE_MAYBE (l->local_name);
xfree (l);
l = next;
static char *
mkstruct (const struct url *u)
{
- char *host, *dir, *file, *res, *dirpref;
+ char *dir, *dir_preencoding;
+ char *file, *res, *dirpref;
+ char *query = u->query && *u->query ? u->query : NULL;
int l;
if (opt.cut_dirs)
else
dir = u->dir + (*u->dir == '/');
- host = xstrdup (u->host);
/* Check for the true name (or at least a consistent name for saving
to directory) of HOST, reusing the hlist if possible. */
- if (opt.add_hostdir && !opt.simple_check)
- {
- char *nhost = realhost (host);
- xfree (host);
- host = nhost;
- }
- /* Add dir_prefix and hostname (if required) to the beginning of
- dir. */
if (opt.add_hostdir)
{
+ /* Add dir_prefix and hostname (if required) to the beginning of
+ dir. */
+ dirpref = (char *)alloca (strlen (opt.dir_prefix) + 1
+ + strlen (u->host)
+ + 1 + numdigit (u->port)
+ + 1);
if (!DOTP (opt.dir_prefix))
+ sprintf (dirpref, "%s/%s", opt.dir_prefix, u->host);
+ else
+ strcpy (dirpref, u->host);
+
+ if (u->port != scheme_default_port (u->scheme))
{
- dirpref = (char *)alloca (strlen (opt.dir_prefix) + 1
- + strlen (host) + 1);
- sprintf (dirpref, "%s/%s", opt.dir_prefix, host);
+ int len = strlen (dirpref);
+ dirpref[len] = ':';
+ long_to_string (dirpref + len + 1, u->port);
}
- else
- STRDUP_ALLOCA (dirpref, host);
}
- else /* not add_hostdir */
+ else /* not add_hostdir */
{
if (!DOTP (opt.dir_prefix))
dirpref = opt.dir_prefix;
else
dirpref = "";
}
- xfree (host);
/* If there is a prefix, prepend it. */
if (*dirpref)
sprintf (newdir, "%s%s%s", dirpref, *dir == '/' ? "" : "/", dir);
dir = newdir;
}
- dir = encode_string (dir);
+
+ dir_preencoding = dir;
+ dir = reencode_string (dir_preencoding);
+
l = strlen (dir);
if (l && dir[l - 1] == '/')
dir[l - 1] = '\0';
file = u->file;
/* Finally, construct the full name. */
- res = (char *)xmalloc (strlen (dir) + 1 + strlen (file) + 1);
+ res = (char *)xmalloc (strlen (dir) + 1 + strlen (file)
+ + (query ? (1 + strlen (query)) : 0)
+ + 1);
sprintf (res, "%s%s%s", dir, *dir ? "/" : "", file);
- xfree (dir);
+ if (query)
+ {
+ strcat (res, "?");
+ strcat (res, query);
+ }
+ if (dir != dir_preencoding)
+ xfree (dir);
return res;
}
{
if (UNSAFE_CHAR (*from))
{
- const unsigned char c = *from++;
+ unsigned char c = *from++;
*to++ = '%';
*to++ = XDIGIT_TO_XCHAR (c >> 4);
*to++ = XDIGIT_TO_XCHAR (c & 0xf);
static int
urlpath_length (const char *url)
{
- const char *q = strchr (url, '?');
- if (q)
- return q - url;
- return strlen (url);
+ const char *q = strpbrk_or_eos (url, "?;#");
+ return q - url;
}
/* Find the last occurrence of character C in the range [b, e), or
The parameters LINKLENGTH is useful if LINK is not zero-terminated.
See uri_merge for a gentler interface to this functionality.
- #### This function should handle `./' and `../' so that the evil
+ Perhaps this function should handle `./' and `../' so that the evil
path_simplify can go. */
static char *
uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
{
const char *end = base + urlpath_length (base);
- if (*link != '/')
+ if (!*link)
{
- /* LINK is a relative URL: we need to replace everything
- after last slash (possibly empty) with LINK.
-
- So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy",
- our result should be "whatever/foo/qux/xyzzy". */
- int need_explicit_slash = 0;
- int span;
- const char *start_insert;
- const char *last_slash = find_last_char (base, end, '/');
- if (!last_slash)
- {
- /* No slash found at all. Append LINK to what we have,
- but we'll need a slash as a separator.
-
- Example: if base == "foo" and link == "qux/xyzzy", then
- we cannot just append link to base, because we'd get
- "fooqux/xyzzy", whereas what we want is
- "foo/qux/xyzzy".
-
- To make sure the / gets inserted, we set
- need_explicit_slash to 1. We also set start_insert
- to end + 1, so that the length calculations work out
- correctly for one more (slash) character. Accessing
- that character is fine, since it will be the
- delimiter, '\0' or '?'. */
- /* example: "foo?..." */
- /* ^ ('?' gets changed to '/') */
- start_insert = end + 1;
- need_explicit_slash = 1;
- }
- else if (last_slash && last_slash != base && *(last_slash - 1) == '/')
- {
- /* example: http://host" */
- /* ^ */
- start_insert = end + 1;
- need_explicit_slash = 1;
- }
- else
- {
- /* example: "whatever/foo/bar" */
- /* ^ */
- start_insert = last_slash + 1;
- }
-
- span = start_insert - base;
- constr = (char *)xmalloc (span + linklength + 1);
- if (span)
- memcpy (constr, base, span);
- if (need_explicit_slash)
- constr[span - 1] = '/';
- if (linklength)
- memcpy (constr + span, link, linklength);
- constr[span + linklength] = '\0';
+ /* Empty LINK points back to BASE, query string and all. */
+ constr = xstrdup (base);
}
- else /* *link == `/' */
+ else if (*link == '?')
+ {
+ /* LINK points to the same location, but changes the query
+ string. Examples: */
+ /* uri_merge("path", "?new") -> "path?new" */
+ /* uri_merge("path?foo", "?new") -> "path?new" */
+ /* uri_merge("path?foo#bar", "?new") -> "path?new" */
+ /* uri_merge("path#foo", "?new") -> "path?new" */
+ int baselength = end - base;
+ constr = xmalloc (baselength + linklength + 1);
+ memcpy (constr, base, baselength);
+ memcpy (constr + baselength, link, linklength);
+ constr[baselength + linklength] = '\0';
+ }
+ else if (*link == '#')
+ {
+ /* uri_merge("path", "#new") -> "path#new" */
+ /* uri_merge("path#foo", "#new") -> "path#new" */
+ /* uri_merge("path?foo", "#new") -> "path?foo#new" */
+ /* uri_merge("path?foo#bar", "#new") -> "path?foo#new" */
+ int baselength;
+ const char *end1 = strchr (base, '#');
+ if (!end1)
+ end1 = base + strlen (base);
+ baselength = end1 - base;
+ constr = xmalloc (baselength + linklength + 1);
+ memcpy (constr, base, baselength);
+ memcpy (constr + baselength, link, linklength);
+ constr[baselength + linklength] = '\0';
+ }
+ else if (*link == '/')
{
/* LINK is an absolute path: we need to replace everything
after (and including) the FIRST slash with LINK.
memcpy (constr + span, link, linklength);
constr[span + linklength] = '\0';
}
+ else
+ {
+ /* LINK is a relative URL: we need to replace everything
+ after last slash (possibly empty) with LINK.
+
+ So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy",
+ our result should be "whatever/foo/qux/xyzzy". */
+ int need_explicit_slash = 0;
+ int span;
+ const char *start_insert;
+ const char *last_slash = find_last_char (base, end, '/');
+ if (!last_slash)
+ {
+ /* No slash found at all. Append LINK to what we have,
+ but we'll need a slash as a separator.
+
+ Example: if base == "foo" and link == "qux/xyzzy", then
+ we cannot just append link to base, because we'd get
+ "fooqux/xyzzy", whereas what we want is
+ "foo/qux/xyzzy".
+
+ To make sure the / gets inserted, we set
+ need_explicit_slash to 1. We also set start_insert
+ to end + 1, so that the length calculations work out
+ correctly for one more (slash) character. Accessing
+ that character is fine, since it will be the
+ delimiter, '\0' or '?'. */
+ /* example: "foo?..." */
+ /* ^ ('?' gets changed to '/') */
+ start_insert = end + 1;
+ need_explicit_slash = 1;
+ }
+ else if (last_slash && last_slash != base && *(last_slash - 1) == '/')
+ {
+ /* example: http://host" */
+ /* ^ */
+ start_insert = end + 1;
+ need_explicit_slash = 1;
+ }
+ else
+ {
+ /* example: "whatever/foo/bar" */
+ /* ^ */
+ start_insert = last_slash + 1;
+ }
+
+ span = start_insert - base;
+ constr = (char *)xmalloc (span + linklength + 1);
+ if (span)
+ memcpy (constr, base, span);
+ if (need_explicit_slash)
+ constr[span - 1] = '/';
+ if (linklength)
+ memcpy (constr + span, link, linklength);
+ constr[span + linklength] = '\0';
+ }
}
else /* !no_scheme */
{
}
\f
static void write_backup_file PARAMS ((const char *, downloaded_file_t));
-static void replace_attr PARAMS ((const char **, int, FILE *, const char *));
-
-/* Change the links in an HTML document. Accepts a structure that
- defines the positions of all the links. */
+static const char *replace_attr PARAMS ((const char *, int, FILE *,
+ const char *));
+static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
+ const char *, int));
+static char *local_quote_string PARAMS ((const char *));
+
+/* Change the links in one HTML file. LINKS is a list of links in the
+ document, along with their positions and the desired direction of
+ the conversion. */
void
-convert_links (const char *file, urlpos *l)
+convert_links (const char *file, struct urlpos *links)
{
struct file_memory *fm;
- FILE *fp;
- const char *p;
- downloaded_file_t downloaded_file_return;
+ FILE *fp;
+ const char *p;
+ downloaded_file_t downloaded_file_return;
+
+ struct urlpos *link;
+ int to_url_count = 0, to_file_count = 0;
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
/* First we do a "dry run": go through the list L and see whether
any URL needs to be converted in the first place. If not, just
leave the file alone. */
- int count = 0;
- urlpos *dry = l;
- for (dry = l; dry; dry = dry->next)
+ int dry_count = 0;
+ struct urlpos *dry = links;
+ for (dry = links; dry; dry = dry->next)
if (dry->convert != CO_NOCONVERT)
- ++count;
- if (!count)
+ ++dry_count;
+ if (!dry_count)
{
logputs (LOG_VERBOSE, _("nothing to do.\n"));
return;
read_file_free (fm);
return;
}
+
/* Here we loop through all the URLs in file, replacing those of
them that are downloaded with relative references. */
p = fm->content;
- for (; l; l = l->next)
+ for (link = links; link; link = link->next)
{
- char *url_start = fm->content + l->pos;
+ char *url_start = fm->content + link->pos;
- if (l->pos >= fm->length)
+ if (link->pos >= fm->length)
{
DEBUGP (("Something strange is going on. Please investigate."));
break;
}
/* If the URL is not to be converted, skip it. */
- if (l->convert == CO_NOCONVERT)
+ if (link->convert == CO_NOCONVERT)
{
- DEBUGP (("Skipping %s at position %d.\n", l->url, l->pos));
+ DEBUGP (("Skipping %s at position %d.\n", link->url->url, link->pos));
continue;
}
quote, to the outfile. */
fwrite (p, 1, url_start - p, fp);
p = url_start;
- if (l->convert == CO_CONVERT_TO_RELATIVE)
+
+ switch (link->convert)
{
+ case CO_CONVERT_TO_RELATIVE:
/* Convert absolute URL to relative. */
- char *newname = construct_relative (file, l->local_name);
- char *quoted_newname = html_quote_string (newname);
- replace_attr (&p, l->size, fp, quoted_newname);
- DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
- l->url, newname, l->pos, file));
- xfree (newname);
- xfree (quoted_newname);
- }
- else if (l->convert == CO_CONVERT_TO_COMPLETE)
- {
+ {
+ char *newname = construct_relative (file, link->local_name);
+ char *quoted_newname = local_quote_string (newname);
+
+ if (!link->link_refresh_p)
+ p = replace_attr (p, link->size, fp, quoted_newname);
+ else
+ p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
+ link->refresh_timeout);
+
+ DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
+ link->url->url, newname, link->pos, file));
+ xfree (newname);
+ xfree (quoted_newname);
+ ++to_file_count;
+ break;
+ }
+ case CO_CONVERT_TO_COMPLETE:
/* Convert the link to absolute URL. */
- char *newlink = l->url;
- char *quoted_newlink = html_quote_string (newlink);
- replace_attr (&p, l->size, fp, quoted_newlink);
- DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
- newlink, l->pos, file));
- xfree (quoted_newlink);
+ {
+ char *newlink = link->url->url;
+ char *quoted_newlink = html_quote_string (newlink);
+
+ if (!link->link_refresh_p)
+ p = replace_attr (p, link->size, fp, quoted_newlink);
+ else
+ p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
+ link->refresh_timeout);
+
+ DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
+ newlink, link->pos, file));
+ xfree (quoted_newlink);
+ ++to_url_count;
+ break;
+ }
+ case CO_NULLIFY_BASE:
+ /* Change the base href to "". */
+ p = replace_attr (p, link->size, fp, "");
+ break;
+ case CO_NOCONVERT:
+ abort ();
+ break;
}
}
+
/* Output the rest of the file. */
if (p - fm->content < fm->length)
fwrite (p, 1, fm->length - (p - fm->content), fp);
fclose (fp);
read_file_free (fm);
- logputs (LOG_VERBOSE, _("done.\n"));
+
+ logprintf (LOG_VERBOSE, "%d-%d\n", to_file_count, to_url_count);
}
/* Construct and return a malloced copy of the relative link from two
return res;
}
\f
-/* Add URL to the head of the list L. */
-urlpos *
-add_url (urlpos *l, const char *url, const char *file)
-{
- urlpos *t;
-
- t = (urlpos *)xmalloc (sizeof (urlpos));
- memset (t, 0, sizeof (*t));
- t->url = xstrdup (url);
- t->local_name = xstrdup (file);
- t->next = l;
- return t;
-}
-
static void
write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
{
-- Dan Harkless <wget@harkless.org>
This [adding a field to the urlpos structure] didn't work
- because convert_file() is called twice: once after all its
- sublinks have been retrieved in recursive_retrieve(), and
- once at the end of the day in convert_all_links(). The
- original linked list collected in recursive_retrieve() is
- lost after the first invocation of convert_links(), and
- convert_all_links() makes a new one (it calls get_urls_html()
- for each file it covers.) That's why your first approach didn't
- work. The way to make it work is perhaps to make this flag a
- field in the `urls_html' list.
+ because convert_file() is called from convert_all_links at
+ the end of the retrieval with a freshly built new urlpos
+ list.
-- Hrvoje Niksic <hniksic@arsdigita.com>
*/
converted_file_ptr = xmalloc(sizeof(*converted_file_ptr));
static int find_fragment PARAMS ((const char *, int, const char **,
const char **));
-static void
-replace_attr (const char **pp, int raw_size, FILE *fp, const char *new_str)
+/* Replace an attribute's original text with NEW_TEXT. */
+
+static const char *
+replace_attr (const char *p, int size, FILE *fp, const char *new_text)
{
- const char *p = *pp;
int quote_flag = 0;
- int size = raw_size;
- char quote_char = '\"';
+ char quote_char = '\"'; /* use "..." for quoting, unless the
+ original value is quoted, in which
+ case reuse its quoting char. */
const char *frag_beg, *frag_end;
/* Structure of our string is:
"...old-contents..."
- <--- l->size ---> (with quotes)
+ <--- size ---> (with quotes)
OR:
...old-contents...
- <--- l->size --> (no quotes) */
+ <--- size --> (no quotes) */
if (*p == '\"' || *p == '\'')
{
size -= 2; /* disregard opening and closing quote */
}
putc (quote_char, fp);
- fputs (new_str, fp);
+ fputs (new_text, fp);
/* Look for fragment identifier, if any. */
if (find_fragment (p, size, &frag_beg, &frag_end))
if (quote_flag)
++p;
putc (quote_char, fp);
- *pp = p;
+
+ return p;
+}
+
+/* The same as REPLACE_ATTR, but used when replacing
+ <meta http-equiv=refresh content="new_text"> because we need to
+ append "timeout_value; URL=" before the next_text. */
+
+static const char *
+replace_attr_refresh_hack (const char *p, int size, FILE *fp,
+ const char *new_text, int timeout)
+{
+ /* "0; URL=..." */
+ char *new_with_timeout = (char *)alloca (numdigit (timeout)
+ + 6 /* "; URL=" */
+ + strlen (new_text)
+ + 1);
+ sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);
+
+ return replace_attr (p, size, fp, new_with_timeout);
}
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
return 0;
}
-typedef struct _downloaded_file_list {
- char* file;
- downloaded_file_t download_type;
- struct _downloaded_file_list* next;
-} downloaded_file_list;
+/* Quote FILE for use as local reference to an HTML file.
+
+ We quote ? as %3F to avoid passing part of the file name as the
+ parameter when browsing the converted file through HTTP. However,
+ it is safe to do this only when `--html-extension' is turned on.
+ This is because converting "index.html?foo=bar" to
+ "index.html%3Ffoo=bar" would break local browsing, as the latter
+ isn't even recognized as an HTML file! However, converting
+ "index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be
+ safe for both local and HTTP-served browsing. */
+
+static char *
+local_quote_string (const char *file)
+{
+ const char *file_sans_qmark;
+ int qm;
+
+ if (!opt.html_extension)
+ return html_quote_string (file);
+
+ qm = count_char (file, '?');
+
+ if (qm)
+ {
+ const char *from = file;
+ char *to, *newname;
+
+ /* qm * 2 because we replace each question mark with "%3F",
+ i.e. replace one char with three, hence two more. */
+ int fsqlen = strlen (file) + qm * 2;
+
+ to = newname = (char *)alloca (fsqlen + 1);
+ for (; *from; from++)
+ {
+ if (*from != '?')
+ *to++ = *from;
+ else
+ {
+ *to++ = '%';
+ *to++ = '3';
+ *to++ = 'F';
+ }
+ }
+ assert (to - newname == fsqlen);
+ *to = '\0';
+
+ file_sans_qmark = newname;
+ }
+ else
+ file_sans_qmark = file;
+
+ return html_quote_string (file_sans_qmark);
+}
+
+/* We're storing "modes" of type downloaded_file_t in the hash table.
+ However, our hash tables only accept pointers for keys and values.
+ So when we need a pointer, we use the address of a
+ downloaded_file_t variable of static storage. */
+
+static downloaded_file_t *
+downloaded_mode_to_ptr (downloaded_file_t mode)
+{
+ static downloaded_file_t
+ v1 = FILE_NOT_ALREADY_DOWNLOADED,
+ v2 = FILE_DOWNLOADED_NORMALLY,
+ v3 = FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,
+ v4 = CHECK_FOR_FILE;
-static downloaded_file_list *downloaded_files;
+ switch (mode)
+ {
+ case FILE_NOT_ALREADY_DOWNLOADED:
+ return &v1;
+ case FILE_DOWNLOADED_NORMALLY:
+ return &v2;
+ case FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED:
+ return &v3;
+ case CHECK_FOR_FILE:
+ return &v4;
+ }
+ return NULL;
+}
+
+/* This should really be merged with dl_file_url_map and
+ downloaded_html_files in recur.c. This was originally a list, but
+ I changed it to a hash table beause it was actually taking a lot of
+ time to find things in it. */
+
+static struct hash_table *downloaded_files_hash;
/* Remembers which files have been downloaded. In the standard case, should be
called with mode == FILE_DOWNLOADED_NORMALLY for each file we actually
it, call with mode == CHECK_FOR_FILE. Please be sure to call this function
with local filenames, not remote URLs. */
downloaded_file_t
-downloaded_file (downloaded_file_t mode, const char* file)
+downloaded_file (downloaded_file_t mode, const char *file)
{
- boolean found_file = FALSE;
- downloaded_file_list* rover = downloaded_files;
-
- while (rover != NULL)
- if (strcmp(rover->file, file) == 0)
- {
- found_file = TRUE;
- break;
- }
- else
- rover = rover->next;
+ downloaded_file_t *ptr;
- if (found_file)
- return rover->download_type; /* file had already been downloaded */
- else
+ if (mode == CHECK_FOR_FILE)
{
- if (mode != CHECK_FOR_FILE)
- {
- rover = xmalloc(sizeof(*rover));
- rover->file = xstrdup(file); /* use xstrdup() so die on out-of-mem. */
- rover->download_type = mode;
- rover->next = downloaded_files;
- downloaded_files = rover;
- }
-
- return FILE_NOT_ALREADY_DOWNLOADED;
+ if (!downloaded_files_hash)
+ return FILE_NOT_ALREADY_DOWNLOADED;
+ ptr = hash_table_get (downloaded_files_hash, file);
+ if (!ptr)
+ return FILE_NOT_ALREADY_DOWNLOADED;
+ return *ptr;
}
+
+ if (!downloaded_files_hash)
+ downloaded_files_hash = make_string_hash_table (0);
+
+ ptr = hash_table_get (downloaded_files_hash, file);
+ if (ptr)
+ return *ptr;
+
+ ptr = downloaded_mode_to_ptr (mode);
+ hash_table_put (downloaded_files_hash, xstrdup (file), &ptr);
+
+ return FILE_NOT_ALREADY_DOWNLOADED;
+}
+
+static int
+df_free_mapper (void *key, void *value, void *ignored)
+{
+ xfree (key);
+ return 0;
}
void
downloaded_files_free (void)
{
- downloaded_file_list* rover = downloaded_files;
- while (rover)
+ if (downloaded_files_hash)
{
- downloaded_file_list *next = rover->next;
- xfree (rover->file);
- xfree (rover);
- rover = next;
+ hash_table_map (downloaded_files_hash, df_free_mapper, NULL);
+ hash_table_destroy (downloaded_files_hash);
+ downloaded_files_hash = NULL;
}
}