int auth_tried_already;
struct rbuf rbuf;
- /* Let the others worry about local filename... */
if (!(*dt & HEAD_ONLY))
+ /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
+ know the local filename so we can save to it. */
assert (u->local != NULL);
authenticate_h = 0;
/* We need to come back here when the initial attempt to retrieve
without authorization header fails. */
- /* Initialize certain elements of struct hstat. */
+ /* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
hs->res = -1;
/* We don't assume text/html by default. */
*dt &= ~TEXTHTML;
+ if (opt.html_extension && (*dt & TEXTHTML))
+ /* -E / --html-extension / html_extension = on was specified, and this is a
+ text/html file. If some case-insensitive variation on ".htm[l]" isn't
+ already the file's suffix, tack on ".html". */
+ {
+ char* last_period_in_local_filename = strrchr(u->local, '.');
+
+ if (last_period_in_local_filename == NULL ||
+ !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
+ strcasecmp(last_period_in_local_filename, ".html") == EQ))
+ {
+ size_t local_filename_len = strlen(u->local);
+
+ u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
+ strcpy(u->local + local_filename_len, ".html");
+
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+ }
+
if (contrange == -1)
hs->restval = 0;
else if (contrange != hs->restval ||
return FOPENERR;
}
}
- else /* opt.dfp */
- fp = opt.dfp;
+ else /* opt.dfp */
+ {
+ fp = opt.dfp;
+ if (!hs->restval)
+ {
+ /* This will silently fail for streams that don't correspond
+ to regular files, but that's OK. */
+ rewind (fp);
+ clearerr (fp);
+ }
+ }
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
(contlen != -1 ? contlen : 0),
&rbuf);
hs->dltime = elapsed_time ();
- if (!opt.dfp)
- fclose (fp);
- else
- fflush (fp);
+ {
+ /* Close or flush the file. We have to be careful to check for
+ error here. Checking the result of fwrite() is not enough --
+ errors could go unnoticed! */
+ int flush_res;
+ if (!opt.dfp)
+ flush_res = fclose (fp);
+ else
+ flush_res = fflush (fp);
+ if (flush_res == EOF)
+ hs->res = -2;
+ }
FREE_MAYBE (all_headers);
CLOSE (sock);
if (hs->res == -2)
static int first_retrieval = 1;
int count;
- int local_dot_orig_file_exists = FALSE;
int use_ts, got_head = 0; /* time-stamping info */
+ char *filename_plus_orig_suffix;
+ char *local_filename = NULL;
char *tms, *suf, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
+ size_t filename_len;
struct http_stat hstat; /* HTTP status */
struct stat st;
else
locf = opt.output_document;
+ /* Yuck. Multiple returns suck. We need to remember to free() the space we
+ xmalloc() here before EACH return. This is one reason it's better to set
+ flags that influence flow control and then return once at the end. */
+ filename_len = strlen(u->local);
+ filename_plus_orig_suffix = xmalloc(filename_len + sizeof(".orig"));
+
if (opt.noclobber && file_exists_p (u->local))
{
/* If opt.noclobber is turned on and file already exists, do not
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML;
free (suf);
+ free(filename_plus_orig_suffix); /* must precede every return! */
/* Another harmless lie: */
return RETROK;
}
use_ts = 0;
if (opt.timestamping)
{
- boolean local_file_exists = FALSE;
+ boolean local_dot_orig_file_exists = FALSE;
if (opt.backup_converted)
/* If -K is specified, we'll act on the assumption that it was specified
_wasn't_ specified last time, or the server contains files called
*.orig, -N will be back to not operating correctly with -k. */
{
- size_t filename_len = strlen(u->local);
- char* filename_plus_orig_suffix = malloc(filename_len +
- sizeof(".orig"));
-
/* Would a single s[n]printf() call be faster? */
strcpy(filename_plus_orig_suffix, u->local);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
/* Try to stat() the .orig file. */
if (stat(filename_plus_orig_suffix, &st) == 0)
{
- local_file_exists = TRUE;
local_dot_orig_file_exists = TRUE;
+ local_filename = filename_plus_orig_suffix;
}
-
- free(filename_plus_orig_suffix);
}
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
if (stat (u->local, &st) == 0)
- local_file_exists = TRUE;
+ local_filename = u->local;
- if (local_file_exists)
+ if (local_filename != NULL)
/* There was a local file, so we'll check later to see if the version
the server has is the same version we already have, allowing us to
skip a download. */
/* Try fetching the document, or at least its head. :-) */
err = gethttp (u, &hstat, dt);
+
+ /* It's unfortunate that wget determines the local filename before finding
+ out the Content-Type of the file. Barring a major restructuring of the
+ code, we need to re-set locf here, since gethttp() may have xrealloc()d
+ u->local to tack on ".html". */
+ if (!opt.output_document)
+ locf = u->local;
+ else
+ locf = opt.output_document;
+
/* Time? */
tms = time_str (NULL);
/* Get the new location (with or without the redirection). */
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
/* Fatal errors just return from the function. */
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case FWRITEERR: case FOPENERR:
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
u->local, strerror (errno));
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case NEWLOCATION:
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return NEWLOCATION;
break;
case RETRFINISHED:
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
if (tml >= tmr &&
(hstat.contlen == -1 || local_size == hstat.contlen))
{
- if (local_dot_orig_file_exists)
- /* We can't collapse this down into just one logprintf()
- call with a variable set to u->local or the .orig
- filename because we have to malloc() space for the
- latter, and because there are multiple returns above (a
- coding style no-no by many measures, for reasons such as
- this) we'd have to remember to free() the string at each
- one to avoid a memory leak. */
- logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s.orig' -- not retrieving.\n\n"),
- u->local);
- else
- logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
+ logprintf (LOG_VERBOSE, _("\
+Server file no newer than local file `%s' -- not retrieving.\n\n"),
+ local_filename);
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix);/*must precede every return!*/
return RETROK;
}
else if (tml >= tmr)
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
tms, u->url, hstat.len, hstat.contlen, locf, count);
}
++opt.numurls;
- opt.downloaded += hstat.len;
- downloaded_file(ADD_FILE, locf);
+ downloaded_increase (hstat.len);
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.res == 0) /* No read error */
tms, u->url, hstat.len, locf, count);
}
++opt.numurls;
- opt.downloaded += hstat.len;
- downloaded_file(ADD_FILE, locf);
+ downloaded_increase (hstat.len);
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
"%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
- opt.downloaded += hstat.len;
- downloaded_file(ADD_FILE, locf);
+ downloaded_increase (hstat.len);
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else /* the same, but not accepted */
break;
}
while (!opt.ntry || (count < opt.ntry));
+ free(filename_plus_orig_suffix); /* must precede every return! */
return TRYLIMEXC;
}
\f