and <sxsvgu824xk.fsf@florida.arsdigita.de>.
+2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
+
+ * ftp.c (getftp): Ditto.
+
+ * http.c (gethttp): Rewind the stream when retrying from scratch.
+
+2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
+
+ * retr.c (retrieve_url): Use url_concat() to handle relative
+ redirections instead of /ad hoc/ code.
+
+ * url.c (url_concat): New function encapsulating weird
+ construct().
+ (urllen_http_hack): New function.
+ (construct): When constructing new URLs, recognize that `?' does
+ not form part of the file name in HTTP.
+
2000-10-13 Adrian Aichner <adrian@xemacs.org>
* retr.c: Add msec timing support for WINDOWS.
expected_bytes = ftp_expected_bytes (ftp_last_respline);
} /* cmd & DO_LIST */
+ /* Some FTP servers return the total length of file after REST
+ command, others just return the remaining size. */
+ if (*len && restval && expected_bytes
+ && (expected_bytes == *len - restval))
+ {
+ DEBUGP (("Lying FTP server found, adjusting.\n"));
+ expected_bytes = *len;
+ }
+
/* If no transmission was required, then everything is OK. */
if (!(cmd & (DO_LIST | DO_RETR)))
return RETRFINISHED;
}
}
else
- fp = opt.dfp;
-
- /* Some FTP servers return the total length of file after REST command,
- others just return the remaining size. */
- if (*len && restval && expected_bytes
- && (expected_bytes == *len - restval))
- {
- DEBUGP (("Lying FTP server found, adjusting.\n"));
- expected_bytes = *len;
- }
+ {
+ fp = opt.dfp;
+ if (!restval)
+ {
+ /* This will silently fail for streams that don't correspond
+ to regular files, but that's OK. */
+ rewind (fp);
+ clearerr (fp);
+ }
+ }
if (*len)
{
return FOPENERR;
}
}
- else /* opt.dfp */
- fp = opt.dfp;
+ else /* opt.dfp */
+ {
+ fp = opt.dfp;
+ if (!hs->restval)
+ {
+ /* This will silently fail for streams that don't correspond
+ to regular files, but that's OK. */
+ rewind (fp);
+ clearerr (fp);
+ }
+ }
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
CMD_DECLARE (cmd_spec_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
-CMD_DECLARE (cmd_spec_outputdocument);
CMD_DECLARE (cmd_spec_recursive);
CMD_DECLARE (cmd_spec_useragent);
{ "noparent", &opt.no_parent, cmd_boolean },
{ "noproxy", &opt.no_proxy, cmd_vector },
{ "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
- { "outputdocument", NULL, cmd_spec_outputdocument },
+ { "outputdocument", &opt.output_document, cmd_string },
{ "pagerequisites", &opt.page_requisites, cmd_boolean },
{ "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
{ "passwd", &opt.ftp_pass, cmd_string },
return 1;
}
-static int
-cmd_spec_outputdocument (const char *com, const char *val, void *closure)
-{
- FREE_MAYBE (opt.output_document);
- opt.output_document = xstrdup (val);
- opt.ntry = 1;
- return 1;
-}
-
static int
cmd_spec_recursive (const char *com, const char *val, void *closure)
{
again:
u = newurl ();
- /* Parse the URL. RFC2068 requires `Location' to contain an
- absoluteURI, but many sites break this requirement. #### We
- should be liberal and accept a relative location, too. */
+ /* Parse the URL. */
result = parseurl (url, u, already_redirected);
if (result != URLOK)
{
location_changed = (result == NEWLOCATION);
if (location_changed)
{
- /* Check for redirection to oneself. */
+ if (mynewloc)
+ {
+ /* The HTTP specs only allow absolute URLs to appear in
+ redirects, but a ton of boneheaded webservers and CGIs
+ out there break the rules and use relative URLs, and
+ popular browsers are lenient about this, so wget should
+ be too. */
+ char *construced_newloc = url_concat (url, mynewloc);
+ free (mynewloc);
+ mynewloc = construced_newloc;
+ }
+ /* Check for redirection to back to itself. */
if (url_equal (url, mynewloc))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
mynewloc);
return WRONGCODE;
}
- if (mynewloc)
- {
- /* The HTTP specs only allow absolute URLs to appear in redirects, but
- a ton of boneheaded webservers and CGIs out there break the rules
- and use relative URLs, and popular browsers are lenient about this,
- so wget should be too. */
- if (strstr(mynewloc, "://") == NULL)
- /* Doesn't look like an absolute URL (this check will incorrectly
- think that rare relative URLs containing "://" later in the
- string are absolute). */
- {
- char *temp = malloc(strlen(url) + strlen(mynewloc) + 1);
-
- if (mynewloc[0] == '/')
- /* "Hostless absolute" URL. Convert to absolute. */
- sprintf(temp,"%s%s", url, mynewloc);
- else
- /* Relative URL. Convert to absolute. */
- sprintf(temp,"%s/%s", url, mynewloc);
-
- free(mynewloc);
- mynewloc = temp;
- }
-
- free (url);
- url = mynewloc;
- }
+ free (url);
+ url = mynewloc;
freeurl (u, 1);
already_redirected = 1;
goto again;
return name;
}
+/* Like strlen(), except if `?' is present in the URL and its protocol
+ is HTTP, act as if `?' is the end of the string. Needed for the
+ correct implementation of `construct' below, at least until we code
+ up proper parsing of URLs. */
+static int
+urllen_http_hack (const char *url)
+{
+ if ((!strncmp (url, "http://", 7)
+ || !strncmp (url, "https://", 7)))
+ {
+ const char *q = strchr (url, '?');
+ if (q)
+ return q - url;
+ }
+ return strlen (url);
+}
+
/* Construct an absolute URL, given a (possibly) relative one. This
is more tricky than it might seem, but it works. */
static char *
if (*sub != '/')
{
- for (i = strlen (url); i && url[i] != '/'; i--);
+ for (i = urllen_http_hack (url); i && url[i] != '/'; i--);
if (!i || (url[i] == url[i - 1]))
{
- int l = strlen (url);
+ int l = urllen_http_hack (url);
char *t = (char *)alloca (l + 2);
- strcpy (t, url);
+ memcpy (t, url, l);
t[l] = '/';
t[l + 1] = '\0';
url = t;
while (fl);
if (!url[i])
{
- int l = strlen (url);
+ int l = urllen_http_hack (url);
char *t = (char *)alloca (l + 2);
strcpy (t, url);
t[l] = '/';
}
return constr;
}
+
+/* Like the function above, but with a saner caller interface. */
+char *
+url_concat (const char *base_url, const char *new_url)
+{
+ return construct (base_url, new_url, strlen (new_url), !has_proto (new_url));
+}
\f
/* Optimize URL by host, destructively replacing u->host with realhost
(u->host). Do this regardless of opt.simple_check. */
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int));
void free_urlpos PARAMS ((urlpos *));
+char *url_concat PARAMS ((const char *, const char *));
+
void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *));
char *url_filename PARAMS ((const struct urlinfo *));