/* HTTP support.
- Copyright (C) 1996-2005 Free Software Foundation, Inc.
+ Copyright (C) 1996-2006 Free Software Foundation, Inc.
This file is part of GNU Wget.
--e;
/* This is safe even on printfs with broken handling of "%.<n>s"
because resp->headers ends with \0. */
- logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
+ logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
}
}
#undef MAX
#define MAX(p, q) ((p) > (q) ? (p) : (q))
+/* Parse the contents of the `Content-Disposition' header, extracting
+ the information useful to Wget. Content-Disposition is a header
+ borrowed from MIME; when used in HTTP, it typically serves for
+ specifying the desired file name of the resource. For example:
+
+ Content-Disposition: attachment; filename="flora.jpg"
+
+ Wget will skip the tokens it doesn't care about, such as
+ "attachment" in the previous example; it will also skip other
+ unrecognized params. If the header is syntactically correct and
+ contains a file name, a copy of the file name is stored in
+ *filename and true is returned. Otherwise, the function returns
+ false.
+
+ The file name is stripped of directory components and must not be
+ empty. */
+
static bool
parse_content_disposition (const char *hdr, char **filename)
{
/* Determine the local filename if needed. Notice that if -O is used
* hstat.local_file is set by http_loop to the argument of -O. */
- if (!hs->local_file)
+ if (!hs->local_file)
{
/* Honor Content-Disposition whether possible. */
- if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
+ if (!opt.content_disposition
+ || !resp_header_copy (resp, "Content-Disposition",
+ hdrval, sizeof (hdrval))
|| !parse_content_disposition (hdrval, &hs->local_file))
{
- /* Choose filename according to URL name. */
+ /* The Content-Disposition header is missing or broken.
+ * Choose unique file name according to given URL. */
hs->local_file = url_file_name (u);
}
}
+ DEBUGP (("hs->local_file is: %s %s\n", hs->local_file,
+ file_exists_p (hs->local_file) ? "(existing)" : "(not existing)"));
+
/* TODO: perform this check only once. */
- if (opt.noclobber && file_exists_p (hs->local_file))
+ if (file_exists_p (hs->local_file))
{
- /* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
- logprintf (LOG_VERBOSE, _("\
+ if (opt.noclobber)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file */
+ logprintf (LOG_VERBOSE, _("\
File `%s' already there; not retrieving.\n\n"), hs->local_file);
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hs->local_file))
- *dt |= TEXTHTML;
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hs->local_file))
+ *dt |= TEXTHTML;
- return RETROK;
+ return RETROK;
+ }
+ else
+ {
+ char *unique = unique_name (hs->local_file, true);
+ if (unique != hs->local_file)
+ xfree (hs->local_file);
+ hs->local_file = unique;
+ }
}
/* Support timestamping */
content-type. */
if (!type ||
0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
- 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
*dt &= ~TEXTHTML;
+ DEBUGP (("TEXTHTML is %s.\n", *dt | TEXTHTML ? "on": "off"));
+
if (opt.html_extension && (*dt & TEXTHTML))
/* -E / --html-extension / html_extension = on was specified, and this is a
text/html file. If some case-insensitive variation on ".htm[l]" isn't
return RETRFINISHED;
}
- /* Print fetch message, if opt.verbose. */
- if (opt.verbose)
- {
- logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
- HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
- }
-
/* Open the local file. */
if (!output_stream)
{
else
fp = output_stream;
+ /* Print fetch message, if opt.verbose. */
+ if (opt.verbose)
+ {
+ logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
+ HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
+ }
+
/* This confuses the timestamping code that checks for file size.
#### The timestamping code should be smarter about file size. */
if (opt.save_headers && hs->restval == 0)
bool got_head = false; /* used for time-stamping */
char *tms;
const char *tmrate;
- uerr_t err;
+ uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
wgint local_size = 0; /* the size of the local file */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
+ struct_stat st;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (opt.spider || (opt.timestamping && !got_head))
+ if ((opt.spider && !opt.recursive) || (opt.timestamping && !got_head))
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
we require a fresh get.
b) caching is explicitly inhibited. */
if ((proxy && count > 1) /* a */
- || !opt.allow_cache /* b */
- )
+ || !opt.allow_cache) /* b */
*dt |= SEND_NOCACHE;
else
*dt &= ~SEND_NOCACHE;
/* Non-fatal errors continue executing the loop, which will
bring them to "while" statement at the end, to judge
whether the number of tries was exceeded. */
- /* free_hstat (&hstat); */
printwhat (count, opt.ntry);
continue;
- case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
- case SSLINITFAILED: case CONTNOTSUPPORTED:
- /* Fatal errors just return from the function. */
- free_hstat (&hstat);
- return err;
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
hstat.local_file, strerror (errno));
- free_hstat (&hstat);
- return err;
+ case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
+ case SSLINITFAILED: case CONTNOTSUPPORTED:
+ /* Fatal errors just return from the function. */
+ ret = err;
+ goto exit;
case CONSSLERR:
/* Another fatal error. */
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
- free_hstat (&hstat);
- return err;
+ ret = err;
+ goto exit;
case NEWLOCATION:
/* Return the new location to the caller. */
if (!*newloc)
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
- free_hstat (&hstat);
- return WRONGCODE;
+ ret = WRONGCODE;
}
- free_hstat (&hstat);
- return NEWLOCATION;
+ else
+ {
+ ret = NEWLOCATION;
+ }
+ goto exit;
case RETRUNNEEDED:
/* The file was already fully retrieved. */
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
case RETRFINISHED:
/* Deal with you later. */
break;
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
+ char *hurl = NULL;
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- char *hurl = url_string (u, true);
+ hurl = url_string (u, true);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
- xfree (hurl);
+ }
+ if (opt.spider && opt.recursive)
+ {
+ if (!hurl) hurl = url_string (u, true);
+ nonexisting_url (hurl, referer);
}
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
tms, hstat.statcode, escnonprint (hstat.error));
logputs (LOG_VERBOSE, "\n");
- free_hstat (&hstat);
- return WRONGCODE;
+ ret = WRONGCODE;
+ xfree_null (hurl);
+ goto exit;
}
/* Did we get the time-stamp? */
logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
hstat.orig_file_name);
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
}
else
{
}
if ((tmr != (time_t) (-1))
- && !opt.spider
+ && (!opt.spider || opt.recursive)
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
}
/* End of time-stamping section. */
- if (opt.spider)
+ if (opt.spider && !opt.recursive)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
escnonprint (hstat.error));
- return RETROK;
+ ret = RETROK;
+ goto exit;
}
tmrate = retr_rate (hstat.rd_size, hstat.dltime);
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
}
else if (hstat.res == 0) /* No read error */
{
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
connection too soon */
_("%s (%s) - Connection closed at byte %s. "),
tms, tmrate, number_to_static_string (hstat.len));
printwhat (count, opt.ntry);
- /* free_hstat (&hstat); */
continue;
}
else
tms, tmrate, number_to_static_string (hstat.len),
hstat.rderrmsg);
printwhat (count, opt.ntry);
- /* free_hstat (&hstat); */
continue;
}
else /* hstat.res == -1 and contlen is given */
number_to_static_string (hstat.contlen),
hstat.rderrmsg);
printwhat (count, opt.ntry);
- /* free_hstat (&hstat); */
continue;
}
}
/* not reached */
}
while (!opt.ntry || (count < opt.ntry));
+
+exit:
+ if (ret == RETROK)
+ *local_file = xstrdup (hstat.local_file);
+ free_hstat (&hstat);
- return TRYLIMEXC;
+ return ret;
}
\f
/* Check whether the result of strptime() indicates success.
#ifdef TESTING
-char *
+const char *
test_parse_content_disposition()
{
int i;
res == test_array[i].result
&& (res == false
|| 0 == strcmp (test_array[i].filename, filename)));
-
- /* printf ("test %d: %s\n", i, res == false ? "false" : filename); */
}
return NULL;