From 0dd418242a66f82def061205fc6366ae63223723 Mon Sep 17 00:00:00 2001 From: hniksic Date: Tue, 31 Oct 2000 11:25:32 -0800 Subject: [PATCH] [svn] Committed my patches from and . --- src/ChangeLog | 17 +++++++++++++++++ src/ftp.c | 29 +++++++++++++++++++---------- src/http.c | 13 +++++++++++-- src/init.c | 12 +----------- src/retr.c | 46 +++++++++++++++------------------------------- src/url.c | 32 ++++++++++++++++++++++++++++---- src/url.h | 2 ++ 7 files changed, 93 insertions(+), 58 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 490dbd09..6392cc41 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,20 @@ +2000-10-31 Hrvoje Niksic + + * ftp.c (getftp): Ditto. + + * http.c (gethttp): Rewind the stream when retrying from scratch. + +2000-10-31 Hrvoje Niksic + + * retr.c (retrieve_url): Use url_concat() to handle relative + redirections instead of /ad hoc/ code. + + * url.c (url_concat): New function encapsulating weird + construct(). + (urllen_http_hack): New function. + (construct): When constructing new URLs, recognize that `?' does + not form part of the file name in HTTP. + 2000-10-13 Adrian Aichner * retr.c: Add msec timing support for WINDOWS. diff --git a/src/ftp.c b/src/ftp.c index ab70114b..7496a5ae 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -648,6 +648,15 @@ Error in server response, closing control connection.\n")); expected_bytes = ftp_expected_bytes (ftp_last_respline); } /* cmd & DO_LIST */ + /* Some FTP servers return the total length of file after REST + command, others just return the remaining size. */ + if (*len && restval && expected_bytes + && (expected_bytes == *len - restval)) + { + DEBUGP (("Lying FTP server found, adjusting.\n")); + expected_bytes = *len; + } + /* If no transmission was required, then everything is OK. */ if (!(cmd & (DO_LIST | DO_RETR))) return RETRFINISHED; @@ -685,16 +694,16 @@ Error in server response, closing control connection.\n")); } } else - fp = opt.dfp; - - /* Some FTP servers return the total length of file after REST command, - others just return the remaining size. */ - if (*len && restval && expected_bytes - && (expected_bytes == *len - restval)) - { - DEBUGP (("Lying FTP server found, adjusting.\n")); - expected_bytes = *len; - } + { + fp = opt.dfp; + if (!restval) + { + /* This will silently fail for streams that don't correspond + to regular files, but that's OK. */ + rewind (fp); + clearerr (fp); + } + } if (*len) { diff --git a/src/http.c b/src/http.c index 59cfc044..24de9bb8 100644 --- a/src/http.c +++ b/src/http.c @@ -843,8 +843,17 @@ Accept: %s\r\n\ return FOPENERR; } } - else /* opt.dfp */ - fp = opt.dfp; + else /* opt.dfp */ + { + fp = opt.dfp; + if (!hs->restval) + { + /* This will silently fail for streams that don't correspond + to regular files, but that's OK. */ + rewind (fp); + clearerr (fp); + } + } /* #### This confuses the code that checks for file size. There should be some overhead information. */ diff --git a/src/init.c b/src/init.c index 9e119445..5b10cdca 100644 --- a/src/init.c +++ b/src/init.c @@ -76,7 +76,6 @@ CMD_DECLARE (cmd_spec_dotstyle); CMD_DECLARE (cmd_spec_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); -CMD_DECLARE (cmd_spec_outputdocument); CMD_DECLARE (cmd_spec_recursive); CMD_DECLARE (cmd_spec_useragent); @@ -139,7 +138,7 @@ static struct { { "noparent", &opt.no_parent, cmd_boolean }, { "noproxy", &opt.no_proxy, cmd_vector }, { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ - { "outputdocument", NULL, cmd_spec_outputdocument }, + { "outputdocument", &opt.output_document, cmd_string }, { "pagerequisites", &opt.page_requisites, cmd_boolean }, { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean }, { "passwd", &opt.ftp_pass, cmd_string }, @@ -915,15 +914,6 @@ cmd_spec_mirror (const char *com, const char *val, void *closure) return 1; } -static int -cmd_spec_outputdocument (const char *com, const char *val, void *closure) -{ - FREE_MAYBE (opt.output_document); - opt.output_document = xstrdup (val); - opt.ntry = 1; - return 1; -} - static int cmd_spec_recursive (const char *com, const char *val, void *closure) { diff --git a/src/retr.c b/src/retr.c index 92e787fb..eceacfe8 100644 --- a/src/retr.c +++ b/src/retr.c @@ -337,9 +337,7 @@ retrieve_url (const char *origurl, char **file, char **newloc, again: u = newurl (); - /* Parse the URL. RFC2068 requires `Location' to contain an - absoluteURI, but many sites break this requirement. #### We - should be liberal and accept a relative location, too. */ + /* Parse the URL. */ result = parseurl (url, u, already_redirected); if (result != URLOK) { @@ -426,40 +424,26 @@ retrieve_url (const char *origurl, char **file, char **newloc, location_changed = (result == NEWLOCATION); if (location_changed) { - /* Check for redirection to oneself. */ + if (mynewloc) + { + /* The HTTP specs only allow absolute URLs to appear in + redirects, but a ton of boneheaded webservers and CGIs + out there break the rules and use relative URLs, and + popular browsers are lenient about this, so wget should + be too. */ + char *construced_newloc = url_concat (url, mynewloc); + free (mynewloc); + mynewloc = construced_newloc; + } + /* Check for redirection to back to itself. */ if (url_equal (url, mynewloc)) { logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"), mynewloc); return WRONGCODE; } - if (mynewloc) - { - /* The HTTP specs only allow absolute URLs to appear in redirects, but - a ton of boneheaded webservers and CGIs out there break the rules - and use relative URLs, and popular browsers are lenient about this, - so wget should be too. */ - if (strstr(mynewloc, "://") == NULL) - /* Doesn't look like an absolute URL (this check will incorrectly - think that rare relative URLs containing "://" later in the - string are absolute). */ - { - char *temp = malloc(strlen(url) + strlen(mynewloc) + 1); - - if (mynewloc[0] == '/') - /* "Hostless absolute" URL. Convert to absolute. */ - sprintf(temp,"%s%s", url, mynewloc); - else - /* Relative URL. Convert to absolute. */ - sprintf(temp,"%s/%s", url, mynewloc); - - free(mynewloc); - mynewloc = temp; - } - - free (url); - url = mynewloc; - } + free (url); + url = mynewloc; freeurl (u, 1); already_redirected = 1; goto again; diff --git a/src/url.c b/src/url.c index b6220e3d..a0747a56 100644 --- a/src/url.c +++ b/src/url.c @@ -1266,6 +1266,23 @@ url_filename (const struct urlinfo *u) return name; } +/* Like strlen(), except if `?' is present in the URL and its protocol + is HTTP, act as if `?' is the end of the string. Needed for the + correct implementation of `construct' below, at least until we code + up proper parsing of URLs. */ +static int +urllen_http_hack (const char *url) +{ + if ((!strncmp (url, "http://", 7) + || !strncmp (url, "https://", 7))) + { + const char *q = strchr (url, '?'); + if (q) + return q - url; + } + return strlen (url); +} + /* Construct an absolute URL, given a (possibly) relative one. This is more tricky than it might seem, but it works. */ static char * @@ -1279,12 +1296,12 @@ construct (const char *url, const char *sub, int subsize, int no_proto) if (*sub != '/') { - for (i = strlen (url); i && url[i] != '/'; i--); + for (i = urllen_http_hack (url); i && url[i] != '/'; i--); if (!i || (url[i] == url[i - 1])) { - int l = strlen (url); + int l = urllen_http_hack (url); char *t = (char *)alloca (l + 2); - strcpy (t, url); + memcpy (t, url, l); t[l] = '/'; t[l + 1] = '\0'; url = t; @@ -1312,7 +1329,7 @@ construct (const char *url, const char *sub, int subsize, int no_proto) while (fl); if (!url[i]) { - int l = strlen (url); + int l = urllen_http_hack (url); char *t = (char *)alloca (l + 2); strcpy (t, url); t[l] = '/'; @@ -1334,6 +1351,13 @@ construct (const char *url, const char *sub, int subsize, int no_proto) } return constr; } + +/* Like the function above, but with a saner caller interface. */ +char * +url_concat (const char *base_url, const char *new_url) +{ + return construct (base_url, new_url, strlen (new_url), !has_proto (new_url)); +} /* Optimize URL by host, destructively replacing u->host with realhost (u->host). Do this regardless of opt.simple_check. */ diff --git a/src/url.h b/src/url.h index 02ea9ca5..0f55ec35 100644 --- a/src/url.h +++ b/src/url.h @@ -98,6 +98,8 @@ urlpos *get_urls_file PARAMS ((const char *)); urlpos *get_urls_html PARAMS ((const char *, const char *, int, int)); void free_urlpos PARAMS ((urlpos *)); +char *url_concat PARAMS ((const char *, const char *)); + void rotate_backups PARAMS ((const char *)); int mkalldirs PARAMS ((const char *)); char *url_filename PARAMS ((const struct urlinfo *)); -- 2.39.2