X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=02645724b136bc0f5554b9e98489e38aacfbdc68;hb=c1b7382ec4c25c23c81a0e0964d94fff72c6a633;hp=853132dc396d9fa52458dc30c49e5d02ef975628;hpb=1153d3d05c2129f50b5e4a81d1f021986a2a19d0;p=wget diff --git a/src/http.c b/src/http.c index 853132dc..02645724 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,6 @@ /* HTTP support. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -17,17 +17,18 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" #include #include @@ -40,7 +41,6 @@ so, delete this exception statement from your version. */ #include #include -#include "wget.h" #include "hash.h" #include "http.h" #include "utils.h" @@ -279,7 +279,7 @@ request_set_user_header (struct request *req, const char *header) return; BOUNDED_TO_ALLOCA (header, p, name); ++p; - while (ISSPACE (*p)) + while (c_isspace (*p)) ++p; request_set_header (req, xstrdup (name), (char *) p, rel_name); } @@ -653,9 +653,9 @@ resp_header_locate (const struct response *resp, const char *name, int start, && 0 == strncasecmp (b, name, name_len)) { b += name_len + 1; - while (b < e && ISSPACE (*b)) + while (b < e && c_isspace (*b)) ++b; - while (b < e && ISSPACE (e[-1])) + while (b < e && c_isspace (e[-1])) --e; *begptr = b; *endptr = e; @@ -754,17 +754,17 @@ resp_status (const struct response *resp, char **message) if (p < end && *p == '/') { ++p; - while (p < end && ISDIGIT (*p)) + while (p < end && c_isdigit (*p)) ++p; if (p < end && *p == '.') ++p; - while (p < end && ISDIGIT (*p)) + while (p < end && c_isdigit (*p)) ++p; } - while (p < end && ISSPACE (*p)) + while (p < end && c_isspace (*p)) ++p; - if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2])) + if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2])) return -1; status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0'); @@ -772,9 +772,9 @@ resp_status (const struct response *resp, char **message) if (message) { - while (p < end && ISSPACE (*p)) + while (p < end && c_isspace (*p)) ++p; - while (p < end && ISSPACE (end[-1])) + while (p < end && c_isspace (end[-1])) --end; *message = strdupdelim (p, end); } @@ -845,27 +845,30 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr, HTTP spec. */ if (*hdr == ':') ++hdr; - while (ISSPACE (*hdr)) + while (c_isspace (*hdr)) ++hdr; if (!*hdr) return false; } - if (!ISDIGIT (*hdr)) + if (!c_isdigit (*hdr)) return false; - for (num = 0; ISDIGIT (*hdr); hdr++) + for (num = 0; c_isdigit (*hdr); hdr++) num = 10 * num + (*hdr - '0'); - if (*hdr != '-' || !ISDIGIT (*(hdr + 1))) + if (*hdr != '-' || !c_isdigit (*(hdr + 1))) return false; *first_byte_ptr = num; ++hdr; - for (num = 0; ISDIGIT (*hdr); hdr++) + for (num = 0; c_isdigit (*hdr); hdr++) num = 10 * num + (*hdr - '0'); - if (*hdr != '/' || !ISDIGIT (*(hdr + 1))) + if (*hdr != '/' || !c_isdigit (*(hdr + 1))) return false; *last_byte_ptr = num; ++hdr; - for (num = 0; ISDIGIT (*hdr); hdr++) - num = 10 * num + (*hdr - '0'); + if (*hdr == '*') + num = -1; + else + for (num = 0; c_isdigit (*hdr); hdr++) + num = 10 * num + (*hdr - '0'); *entity_length_ptr = num; return true; } @@ -939,7 +942,7 @@ extract_param (const char **source, param_token *name, param_token *value, { const char *p = *source; - while (ISSPACE (*p)) ++p; + while (c_isspace (*p)) ++p; if (!*p) { *source = p; @@ -948,11 +951,11 @@ extract_param (const char **source, param_token *name, param_token *value, /* Extract name. */ name->b = p; - while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p; + while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p; name->e = p; if (name->b == name->e) return false; /* empty name: error */ - while (ISSPACE (*p)) ++p; + while (c_isspace (*p)) ++p; if (*p == separator || !*p) /* no value */ { xzero (*value); @@ -965,7 +968,7 @@ extract_param (const char **source, param_token *name, param_token *value, /* *p is '=', extract value */ ++p; - while (ISSPACE (*p)) ++p; + while (c_isspace (*p)) ++p; if (*p == '"') /* quoted */ { value->b = ++p; @@ -974,7 +977,7 @@ extract_param (const char **source, param_token *name, param_token *value, return false; value->e = p++; /* Currently at closing quote; find the end of param. */ - while (ISSPACE (*p)) ++p; + while (c_isspace (*p)) ++p; while (*p && *p != separator) ++p; if (*p == separator) ++p; @@ -987,7 +990,7 @@ extract_param (const char **source, param_token *name, param_token *value, value->b = p; while (*p && *p != separator) ++p; value->e = p; - while (value->e != value->b && ISSPACE (value->e[-1])) + while (value->e != value->b && c_isspace (value->e[-1])) --value->e; if (*p == separator) ++p; } @@ -1315,7 +1318,7 @@ free_hstat (struct http_stat *hs) #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ + && (c_isspace (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) #define SET_USER_AGENT(req) do { \ @@ -1622,14 +1625,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) only hurts us. */ request_remove_header (req, "Authorization"); } - } - - if (sock < 0) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) + else if (host_lookup_failed) { request_free (req); logprintf(LOG_NOTQUIET, @@ -1637,7 +1633,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) exec_name, relevant->host); return HOSTERR; } + } + if (sock < 0) + { sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) { @@ -2025,7 +2024,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); char *tmp = strchr (type, ';'); if (tmp) { - while (tmp > type && ISSPACE (tmp[-1])) + while (tmp > type && c_isspace (tmp[-1])) --tmp; *tmp = '\0'; } @@ -2056,7 +2055,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); wgint first_byte_pos, last_byte_pos, entity_length; if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, &entity_length)) - contrange = first_byte_pos; + { + contrange = first_byte_pos; + contlen = last_byte_pos - first_byte_pos + 1; + } } resp_free (resp); @@ -2156,7 +2158,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); CLOSE_INVALIDATE (sock); return RANGEERR; } - hs->contlen = contlen + contrange; + if (contlen == -1) + hs->contlen = -1; + else + hs->contlen = contlen + contrange; if (opt.verbose) { @@ -2348,6 +2353,31 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, hstat.local_file = xstrdup (opt.output_document); got_name = true; } + else if (!opt.content_disposition) + { + hstat.local_file = url_file_name (u); + got_name = true; + } + + /* TODO: Ick! This code is now in both gethttp and http_loop, and is + * screaming for some refactoring. */ + if (got_name && file_exists_p (hstat.local_file) && opt.noclobber) + { + /* If opt.noclobber is turned on and file already exists, do not + retrieve the file */ + logprintf (LOG_VERBOSE, _("\ +File `%s' already there; not retrieving.\n\n"), + hstat.local_file); + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; + + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (hstat.local_file)) + *dt |= TEXTHTML; + + return RETRUNNEEDED; + } /* Reset the counter. */ count = 0; @@ -2542,9 +2572,9 @@ Remote file does not exist -- broken link!!!\n")); } /* Did we get the time-stamp? */ - if (send_head_first && !got_head) + if (!got_head) { - bool restart_loop = false; + got_head = true; /* no more time-stamping */ if (opt.timestamping && !hstat.remote_time) { @@ -2562,92 +2592,87 @@ Last-modified header invalid -- time-stamp ignored.\n")); time_came_from_head = true; } - /* The time-stamping section. */ - if (opt.timestamping) + if (send_head_first) { - if (hstat.orig_file_name) /* Perform the following checks only - if the file we're supposed to - download already exists. */ + /* The time-stamping section. */ + if (opt.timestamping) { - if (hstat.remote_time && - tmr != (time_t) (-1)) + if (hstat.orig_file_name) /* Perform the following + checks only if the file + we're supposed to + download already exists. */ { - /* Now time-stamping can be used validly. Time-stamping - means that if the sizes of the local and remote file - match, and local file is newer than the remote file, - it will not be retrieved. Otherwise, the normal - download procedure is resumed. */ - if (hstat.orig_file_tstamp >= tmr) + if (hstat.remote_time && + tmr != (time_t) (-1)) { - if (hstat.contlen == -1 - || hstat.orig_file_size == hstat.contlen) + /* Now time-stamping can be used validly. + Time-stamping means that if the sizes of + the local and remote file match, and local + file is newer than the remote file, it will + not be retrieved. Otherwise, the normal + download procedure is resumed. */ + if (hstat.orig_file_tstamp >= tmr) { - logprintf (LOG_VERBOSE, _("\ + if (hstat.contlen == -1 + || hstat.orig_file_size == hstat.contlen) + { + logprintf (LOG_VERBOSE, _("\ Server file no newer than local file `%s' -- not retrieving.\n\n"), - hstat.orig_file_name); - ret = RETROK; - goto exit; - } - else - { - logprintf (LOG_VERBOSE, _("\ + hstat.orig_file_name); + ret = RETROK; + goto exit; + } + else + { + logprintf (LOG_VERBOSE, _("\ The sizes do not match (local %s) -- retrieving.\n"), - number_to_static_string (hstat.orig_file_size)); + number_to_static_string (hstat.orig_file_size)); + } } - } - else - logputs (LOG_VERBOSE, - _("Remote file is newer, retrieving.\n")); + else + logputs (LOG_VERBOSE, + _("Remote file is newer, retrieving.\n")); - logputs (LOG_VERBOSE, "\n"); + logputs (LOG_VERBOSE, "\n"); + } } + + /* free_hstat (&hstat); */ + hstat.timestamp_checked = true; } - /* free_hstat (&hstat); */ - hstat.timestamp_checked = true; - restart_loop = true; - } - - if (opt.spider) - { - if (opt.recursive) + if (opt.spider) { - if (*dt & TEXTHTML) + if (opt.recursive) { - logputs (LOG_VERBOSE, _("\ + if (*dt & TEXTHTML) + { + logputs (LOG_VERBOSE, _("\ Remote file exists and could contain links to other resources -- retrieving.\n\n")); - restart_loop = true; + } + else + { + logprintf (LOG_VERBOSE, _("\ +Remote file exists but does not contain any link -- not retrieving.\n\n")); + ret = RETROK; /* RETRUNNEEDED is not for caller. */ + goto exit; + } } - else + else { logprintf (LOG_VERBOSE, _("\ -Remote file exists but does not contain any link -- not retrieving.\n\n")); +Remote file exists but recursion is disabled -- not retrieving.\n\n")); ret = RETROK; /* RETRUNNEEDED is not for caller. */ goto exit; } } - else - { - logprintf (LOG_VERBOSE, _("\ -Remote file exists but recursion is disabled -- not retrieving.\n\n")); - ret = RETROK; /* RETRUNNEEDED is not for caller. */ - goto exit; - } - } - if (send_head_first) - { got_name = true; - restart_loop = true; - } - - got_head = true; /* no more time-stamping */ - *dt &= ~HEAD_ONLY; - count = 0; /* the retrieve count for HEAD is reset */ - - if (restart_loop) - continue; - } + *dt &= ~HEAD_ONLY; + count = 0; /* the retrieve count for HEAD is reset */ + continue; + } /* send_head_first */ + } /* !got_head */ if ((tmr != (time_t) (-1)) && ((hstat.len == hstat.contlen) || @@ -2800,11 +2825,11 @@ check_end (const char *p) { if (!p) return false; - while (ISSPACE (*p)) + while (c_isspace (*p)) ++p; if (!*p || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T') - || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1]))) + || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1]))) return true; else return false; @@ -2920,7 +2945,7 @@ basic_authentication_encode (const char *user, const char *passwd) } #define SKIP_WS(x) do { \ - while (ISSPACE (*(x))) \ + while (c_isspace (*(x))) \ ++(x); \ } while (0) @@ -3052,7 +3077,7 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", ((e) - (b) >= STRSIZE (literal) \ && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ && ((e) - (b) == STRSIZE (literal) \ - || ISSPACE (b[STRSIZE (literal)]))) + || c_isspace (b[STRSIZE (literal)]))) static bool known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) @@ -3081,7 +3106,7 @@ create_authorization_line (const char *au, const char *user, { /* We are called only with known schemes, so we can dispatch on the first letter. */ - switch (TOUPPER (*au)) + switch (c_toupper (*au)) { case 'B': /* Basic */ *finished = true;