char **result = (char **)arg;
char *p;
- *result = xstrdup (hdr);
p = strrchr (hdr, ';');
if (p)
{
char *authenticate_h;
char *proxyauth;
char *all_headers;
+ char *host_port;
+ int host_port_len;
int sock, hcount, num_written, all_length, remport, statcode;
long contlen, contrange;
struct urlinfo *ou;
int auth_tried_already;
struct rbuf rbuf;
- /* Let the others worry about local filename... */
if (!(*dt & HEAD_ONLY))
+ /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
+ know the local filename so we can save to it. */
assert (u->local != NULL);
authenticate_h = 0;
/* We need to come back here when the initial attempt to retrieve
without authorization header fails. */
- /* Initialize certain elements of struct hstat. */
+ /* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
hs->res = -1;
path = u->proxy->url;
else
path = u->path;
+
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
referer = NULL;
if (ou->referer)
}
remhost = ou->host;
remport = ou->port;
+
+ if (remport == 80)
+ {
+ host_port = NULL;
+ host_port_len = 0;
+ }
+ else
+ {
+ host_port = (char *)alloca (numdigit (remport) + 2);
+ host_port_len = sprintf (host_port, ":%d", remport);
+ }
+
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path)
+ strlen (useragent)
- + strlen (remhost) + numdigit (remport)
+ + strlen (remhost) + host_port_len
+ strlen (HTTP_ACCEPT)
+ (referer ? strlen (referer) : 0)
+ (wwwauth ? strlen (wwwauth) : 0)
sprintf (request, "\
%s %s HTTP/1.0\r\n\
User-Agent: %s\r\n\
-Host: %s:%d\r\n\
+Host: %s%s\r\n\
Accept: %s\r\n\
%s%s%s%s%s%s\r\n",
- command, path, useragent, remhost, remport, HTTP_ACCEPT,
- referer ? referer : "",
- wwwauth ? wwwauth : "",
- proxyauth ? proxyauth : "",
- range ? range : "",
- pragma_h,
- opt.user_header ? opt.user_header : "");
+ command, path, useragent, remhost,
+ host_port ? host_port : "",
+ HTTP_ACCEPT, referer ? referer : "",
+ wwwauth ? wwwauth : "",
+ proxyauth ? proxyauth : "",
+ range ? range : "",
+ pragma_h,
+ opt.user_header ? opt.user_header : "");
DEBUGP (("---request begin---\n%s---request end---\n", request));
/* Free the temporary memory. */
FREE_MAYBE (wwwauth);
if (num_written < 0)
{
logputs (LOG_VERBOSE, _("Failed writing HTTP request.\n"));
- free (request);
CLOSE (sock);
return WRITEFAILED;
}
/* We don't assume text/html by default. */
*dt &= ~TEXTHTML;
+ if (opt.html_extension && (*dt & TEXTHTML))
+ /* -E / --html-extension / html_extension = on was specified, and this is a
+ text/html file. If some case-insensitive variation on ".htm[l]" isn't
+ already the file's suffix, tack on ".html". */
+ {
+ char* last_period_in_local_filename = strrchr(u->local, '.');
+
+ if (last_period_in_local_filename == NULL ||
+ !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
+ strcasecmp(last_period_in_local_filename, ".html") == EQ))
+ {
+ size_t local_filename_len = strlen(u->local);
+
+ u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
+ strcpy(u->local + local_filename_len, ".html");
+
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+ }
+
if (contrange == -1)
hs->restval = 0;
else if (contrange != hs->restval ||
return FOPENERR;
}
}
- else /* opt.dfp */
- fp = opt.dfp;
+ else /* opt.dfp */
+ {
+ fp = opt.dfp;
+ if (!hs->restval)
+ {
+ /* This will silently fail for streams that don't correspond
+ to regular files, but that's OK. */
+ rewind (fp);
+ clearerr (fp);
+ }
+ }
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
(contlen != -1 ? contlen : 0),
&rbuf);
hs->dltime = elapsed_time ();
- if (!opt.dfp)
- fclose (fp);
- else
- fflush (fp);
+ {
+ /* Close or flush the file. We have to be careful to check for
+ error here. Checking the result of fwrite() is not enough --
+ errors could go unnoticed! */
+ int flush_res;
+ if (!opt.dfp)
+ flush_res = fclose (fp);
+ else
+ flush_res = fflush (fp);
+ if (flush_res == EOF)
+ hs->res = -2;
+ }
FREE_MAYBE (all_headers);
CLOSE (sock);
if (hs->res == -2)
int count;
int use_ts, got_head = 0; /* time-stamping info */
+ char *filename_plus_orig_suffix;
+ char *local_filename = NULL;
char *tms, *suf, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
+ size_t filename_len;
struct http_stat hstat; /* HTTP status */
struct stat st;
*newloc = NULL;
/* Warn on (likely bogus) wildcard usage in HTTP. Don't use
- has_wildcards_p because it would also warn on `?', and we that
+ has_wildcards_p because it would also warn on `?', and we know that
shows up in CGI paths a *lot*. */
if (strchr (u->url, '*'))
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
else
locf = opt.output_document;
+ /* Yuck. Multiple returns suck. We need to remember to free() the space we
+ xmalloc() here before EACH return. This is one reason it's better to set
+ flags that influence flow control and then return once at the end. */
+ filename_len = strlen(u->local);
+ filename_plus_orig_suffix = xmalloc(filename_len + sizeof(".orig"));
+
if (opt.noclobber && file_exists_p (u->local))
{
/* If opt.noclobber is turned on and file already exists, do not
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML;
free (suf);
+ free(filename_plus_orig_suffix); /* must precede every return! */
/* Another harmless lie: */
return RETROK;
}
use_ts = 0;
if (opt.timestamping)
{
- if (stat (u->local, &st) == 0)
+ boolean local_dot_orig_file_exists = FALSE;
+
+ if (opt.backup_converted)
+ /* If -K is specified, we'll act on the assumption that it was specified
+ last time these files were downloaded as well, and instead of just
+ comparing local file X against server file X, we'll compare local
+ file X.orig (if extant, else X) against server file X. If -K
+ _wasn't_ specified last time, or the server contains files called
+ *.orig, -N will be back to not operating correctly with -k. */
+ {
+ /* Would a single s[n]printf() call be faster? */
+ strcpy(filename_plus_orig_suffix, u->local);
+ strcpy(filename_plus_orig_suffix + filename_len, ".orig");
+
+ /* Try to stat() the .orig file. */
+ if (stat(filename_plus_orig_suffix, &st) == 0)
+ {
+ local_dot_orig_file_exists = TRUE;
+ local_filename = filename_plus_orig_suffix;
+ }
+ }
+
+ if (!local_dot_orig_file_exists)
+ /* Couldn't stat() <file>.orig, so try to stat() <file>. */
+ if (stat (u->local, &st) == 0)
+ local_filename = u->local;
+
+ if (local_filename != NULL)
+ /* There was a local file, so we'll check later to see if the version
+ the server has is the same version we already have, allowing us to
+ skip a download. */
{
use_ts = 1;
tml = st.st_mtime;
/* Increment the pass counter. */
++count;
/* Wait before the retrieval (unless this is the very first
- retrieval). */
- if (!first_retrieval && opt.wait)
- sleep (opt.wait);
+ retrieval).
+ Check if we are retrying or not, wait accordingly - HEH */
+ if (!first_retrieval && (opt.wait || (count && opt.waitretry)))
+ {
+ if (count)
+ {
+ if (count<opt.waitretry)
+ sleep(count);
+ else
+ sleep(opt.waitretry);
+ }
+ else
+ sleep (opt.wait);
+ }
if (first_retrieval)
first_retrieval = 0;
/* Get the current time string. */
/* Try fetching the document, or at least its head. :-) */
err = gethttp (u, &hstat, dt);
+
+ /* It's unfortunate that wget determines the local filename before finding
+ out the Content-Type of the file. Barring a major restructuring of the
+ code, we need to re-set locf here, since gethttp() may have xrealloc()d
+ u->local to tack on ".html". */
+ if (!opt.output_document)
+ locf = u->local;
+ else
+ locf = opt.output_document;
+
/* Time? */
tms = time_str (NULL);
/* Get the new location (with or without the redirection). */
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
/* Fatal errors just return from the function. */
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case FWRITEERR: case FOPENERR:
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
u->local, strerror (errno));
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case NEWLOCATION:
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return NEWLOCATION;
break;
case RETRFINISHED:
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
(hstat.contlen == -1 || local_size == hstat.contlen))
{
logprintf (LOG_VERBOSE, _("\
-Local file `%s' is more recent, not retrieving.\n\n"), u->local);
+Server file no newer than local file `%s' -- not retrieving.\n\n"),
+ local_filename);
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix);/*must precede every return!*/
return RETROK;
}
else if (tml >= tmr)
logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %ld), retrieving.\n"), local_size);
+The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else
logputs (LOG_VERBOSE,
_("Remote file is newer, retrieving.\n"));
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
}
++opt.numurls;
opt.downloaded += hstat.len;
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.res == 0) /* No read error */
{
- if (hstat.contlen == -1) /* We don't know how much we were
- supposed to get, so... */
+ if (hstat.contlen == -1) /* We don't know how much we were supposed
+ to get, so assume we succeeded. */
{
if (*dt & RETROKF)
{
}
++opt.numurls;
opt.downloaded += hstat.len;
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
opt.downloaded += hstat.len;
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else /* the same, but not accepted */
break;
}
while (!opt.ntry || (count < opt.ntry));
+ free(filename_plus_orig_suffix); /* must precede every return! */
return TRYLIMEXC;
}
\f