X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=67e9a989eae93fddfae02cdb73db16fb06656c74;hp=26342593ac582c381c37a3988da34046e8204ca9;hb=f2956990ca0ec026c3b7702ec1d7afbd6f9dacf9;hpb=2fe72be505d2d91fc0bbbd22cc19f3d288813671 diff --git a/src/http.c b/src/http.c index 26342593..67e9a989 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,6 @@ /* HTTP support. - Copyright (C) 1996-2006 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -1621,19 +1622,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) only hurts us. */ request_remove_header (req, "Authorization"); } - } - - if (sock < 0) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) + else if (host_lookup_failed) { request_free (req); + logprintf(LOG_NOTQUIET, + _("%s: unable to resolve host address `%s'\n"), + exec_name, relevant->host); return HOSTERR; } + } + if (sock < 0) + { sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) { @@ -1820,7 +1820,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); if (has_html_suffix_p (hs->local_file)) *dt |= TEXTHTML; - return RETROK; + return RETRUNNEEDED; } else if (!ALLOW_CLOBBER) { @@ -2305,14 +2305,15 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, { int count; bool got_head = false; /* used for time-stamping and filename detection */ + bool time_came_from_head = false; bool got_name = false; char *tms; const char *tmrate; uerr_t err, ret = TRYLIMEXC; time_t tmr = -1; /* remote time-stamp */ - wgint local_size = 0; /* the size of the local file */ struct http_stat hstat; /* HTTP status */ struct_stat st; + bool send_head_first = true; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); @@ -2343,6 +2344,11 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, hstat.local_file = xstrdup (opt.output_document); got_name = true; } + else if (!opt.content_disposition) + { + hstat.local_file = url_file_name (u); + got_name = true; + } /* Reset the counter. */ count = 0; @@ -2350,6 +2356,19 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Reset the document type. */ *dt = 0; + /* Skip preliminary HEAD request if we're not in spider mode AND + * if -O was given or HTTP Content-Disposition support is disabled. */ + if (!opt.spider + && (got_name || !opt.content_disposition)) + send_head_first = false; + + /* Send preliminary HEAD request if -N is given and we have an existing + * destination file. */ + if (opt.timestamping + && !opt.content_disposition + && file_exists_p (url_file_name (u))) + send_head_first = true; + /* THE loop */ do { @@ -2358,7 +2377,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, sleep_between_retrievals (count); /* Get the current time string. */ - tms = time_str (time (NULL)); + tms = datetime_str (time (NULL)); if (opt.spider && !got_head) logprintf (LOG_VERBOSE, _("\ @@ -2391,8 +2410,7 @@ Spider mode enabled. Check if remote file exists.\n")); /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (((opt.spider || opt.timestamping) && !got_head) - || (opt.always_rest && !got_name)) + if (send_head_first && !got_head) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; @@ -2428,12 +2446,12 @@ Spider mode enabled. Check if remote file exists.\n")); err = gethttp (u, &hstat, dt, proxy); /* Time? */ - tms = time_str (time (NULL)); + tms = datetime_str (time (NULL)); /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); - + switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: @@ -2484,7 +2502,7 @@ Spider mode enabled. Check if remote file exists.\n")); /* All possibilities should have been exhausted. */ abort (); } - + if (!(*dt & RETROKF)) { char *hurl = NULL; @@ -2494,9 +2512,17 @@ Spider mode enabled. Check if remote file exists.\n")); hurl = url_string (u, URL_AUTH_HIDE_PASSWD); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); } + + /* Fall back to GET if HEAD fails with a 500 or 501 error code. */ + if (*dt & HEAD_ONLY + && (hstat.statcode == 500 || hstat.statcode == 501)) + { + got_head = true; + continue; + } /* Maybe we should always keep track of broken links, not just in * spider mode. */ - if (opt.spider) + else if (opt.spider) { /* #### Again: ugly ugly ugly! */ if (!hurl) @@ -2519,7 +2545,7 @@ Remote file does not exist -- broken link!!!\n")); /* Did we get the time-stamp? */ if (!got_head) { - bool restart_loop = false; + got_head = true; /* no more time-stamping */ if (opt.timestamping && !hstat.remote_time) { @@ -2533,94 +2559,91 @@ Last-modified header missing -- time-stamps turned off.\n")); if (tmr == (time_t) (-1)) logputs (LOG_VERBOSE, _("\ Last-modified header invalid -- time-stamp ignored.\n")); + if (*dt & HEAD_ONLY) + time_came_from_head = true; } - /* The time-stamping section. */ - if (opt.timestamping) + if (send_head_first) { - if (hstat.orig_file_name) /* Perform the following checks only - if the file we're supposed to - download already exists. */ + /* The time-stamping section. */ + if (opt.timestamping) { - if (hstat.remote_time && - tmr != (time_t) (-1)) + if (hstat.orig_file_name) /* Perform the following + checks only if the file + we're supposed to + download already exists. */ { - /* Now time-stamping can be used validly. Time-stamping - means that if the sizes of the local and remote file - match, and local file is newer than the remote file, - it will not be retrieved. Otherwise, the normal - download procedure is resumed. */ - if (hstat.orig_file_tstamp >= tmr) + if (hstat.remote_time && + tmr != (time_t) (-1)) { - if (hstat.contlen == -1 - || hstat.orig_file_size == hstat.contlen) + /* Now time-stamping can be used validly. + Time-stamping means that if the sizes of + the local and remote file match, and local + file is newer than the remote file, it will + not be retrieved. Otherwise, the normal + download procedure is resumed. */ + if (hstat.orig_file_tstamp >= tmr) { - logprintf (LOG_VERBOSE, _("\ + if (hstat.contlen == -1 + || hstat.orig_file_size == hstat.contlen) + { + logprintf (LOG_VERBOSE, _("\ Server file no newer than local file `%s' -- not retrieving.\n\n"), - hstat.orig_file_name); - ret = RETROK; - goto exit; - } - else - { - logprintf (LOG_VERBOSE, _("\ + hstat.orig_file_name); + ret = RETROK; + goto exit; + } + else + { + logprintf (LOG_VERBOSE, _("\ The sizes do not match (local %s) -- retrieving.\n"), - number_to_static_string (local_size)); + number_to_static_string (hstat.orig_file_size)); + } } - } - else - logputs (LOG_VERBOSE, - _("Remote file is newer, retrieving.\n")); + else + logputs (LOG_VERBOSE, + _("Remote file is newer, retrieving.\n")); - logputs (LOG_VERBOSE, "\n"); + logputs (LOG_VERBOSE, "\n"); + } } + + /* free_hstat (&hstat); */ + hstat.timestamp_checked = true; } - /* free_hstat (&hstat); */ - hstat.timestamp_checked = true; - restart_loop = true; - } - - if (opt.always_rest) - { - got_name = true; - restart_loop = true; - } - - if (opt.spider) - { - if (opt.recursive) + if (opt.spider) { - if (*dt & TEXTHTML) + if (opt.recursive) { - logputs (LOG_VERBOSE, _("\ + if (*dt & TEXTHTML) + { + logputs (LOG_VERBOSE, _("\ Remote file exists and could contain links to other resources -- retrieving.\n\n")); - restart_loop = true; + } + else + { + logprintf (LOG_VERBOSE, _("\ +Remote file exists but does not contain any link -- not retrieving.\n\n")); + ret = RETROK; /* RETRUNNEEDED is not for caller. */ + goto exit; + } } - else + else { logprintf (LOG_VERBOSE, _("\ -Remote file exists but does not contain any link -- not retrieving.\n\n")); - ret = RETRUNNEEDED; +Remote file exists but recursion is disabled -- not retrieving.\n\n")); + ret = RETROK; /* RETRUNNEEDED is not for caller. */ goto exit; } } - else - { - logprintf (LOG_VERBOSE, _("\ -Remote file exists but recursion is disabled -- not retrieving.\n\n")); - ret = RETRUNNEEDED; - goto exit; - } - } - got_head = true; /* no more time-stamping */ - *dt &= ~HEAD_ONLY; - count = 0; /* the retrieve count for HEAD is reset */ - - if (restart_loop) - continue; - } + got_name = true; + *dt &= ~HEAD_ONLY; + count = 0; /* the retrieve count for HEAD is reset */ + continue; + } /* send_head_first */ + } /* !got_head */ if ((tmr != (time_t) (-1)) && ((hstat.len == hstat.contlen) || @@ -2637,7 +2660,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n")); else fl = hstat.local_file; if (fl) - touch (fl, tmr); + { + time_t newtmr = -1; + /* Reparse time header, in case it's changed. */ + if (time_came_from_head + && hstat.remote_time && hstat.remote_time[0]) + { + newtmr = http_atotm (hstat.remote_time); + if (newtmr != -1) + tmr = newtmr; + } + touch (fl, tmr); + } } /* End of time-stamping section. */