/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <time.h>
char *line = fd_read_line (fd);
char *endl;
if (line == NULL)
- {
- ret = -1;
- break;
- }
+ break;
remaining_chunk_size = strtol (line, &endl, 16);
if (remaining_chunk_size == 0)
{
- ret = 0;
- if (fd_read_line (fd) == NULL)
- ret = -1;
+ fd_read_line (fd);
break;
}
}
false.
The file name is stripped of directory components and must not be
- empty. */
+ empty.
+
+ Historically, this function returned filename prefixed with opt.dir_prefix,
+ now that logic is handled by the caller, new code should pay attention,
+ changed by crq, Sep 2010.
+
+*/
static bool
parse_content_disposition (const char *hdr, char **filename)
{
- *filename = NULL;
param_token name, value;
+ *filename = NULL;
while (extract_param (&hdr, &name, &value, ';'))
{
int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
if ( isFilename && value.b != NULL)
- {
- /* Make the file name begin at the last slash or backslash. */
- const char *last_slash = memrchr (value.b, '/', value.e - value.b);
- const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
- if (last_slash && last_bs)
- value.b = 1 + MAX (last_slash, last_bs);
- else if (last_slash || last_bs)
- value.b = 1 + (last_slash ? last_slash : last_bs);
- if (value.b == value.e)
- continue;
- /* Start with the directory prefix, if specified. */
- if (opt.dir_prefix)
- {
- if (!(*filename))
- {
- int prefix_length = strlen (opt.dir_prefix);
- bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
- int total_length;
-
- if (add_slash)
- ++prefix_length;
- total_length = prefix_length + (value.e - value.b);
- *filename = xmalloc (total_length + 1);
- strcpy (*filename, opt.dir_prefix);
- if (add_slash)
- (*filename)[prefix_length - 1] = '/';
- memcpy (*filename + prefix_length, value.b, (value.e - value.b));
- (*filename)[total_length] = '\0';
- }
- else
- {
- append_value_to_filename (filename, &value);
- }
- }
- else
- {
- if (*filename)
- {
- append_value_to_filename (filename, &value);
- }
- else
- {
- *filename = strdupdelim (value.b, value.e);
- }
- }
- }
+ {
+ /* Make the file name begin at the last slash or backslash. */
+ const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+ const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+ if (last_slash && last_bs)
+ value.b = 1 + MAX (last_slash, last_bs);
+ else if (last_slash || last_bs)
+ value.b = 1 + (last_slash ? last_slash : last_bs);
+ if (value.b == value.e)
+ continue;
+
+ if (*filename)
+ append_value_to_filename (filename, &value);
+ else
+ *filename = strdupdelim (value.b, value.e);
+ }
}
+
if (*filename)
- {
- return true;
- }
+ return true;
else
- {
- return false;
- }
+ return false;
}
\f
hs->error = NULL;
}
+static void
+get_file_flags (const char *filename, int *dt)
+{
+ logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"), quote (filename));
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (filename))
+ *dt |= TEXTHTML;
+}
+
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
&& (c_isspace (line[sizeof (string_constant) - 1]) \
server, and u->url will be requested. */
static uerr_t
gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
- struct iri *iri)
+ struct iri *iri, int count)
{
struct request *req;
/* Is the server using the chunked transfer encoding? */
bool chunked_transfer_encoding = false;
- /* Whether keep-alive should be inhibited.
-
- RFC 2068 requests that 1.0 clients not send keep-alive requests
- to proxies. This is because many 1.0 proxies do not interpret
- the Connection header and transfer it to the remote server,
- causing it to not close the connection and leave both the proxy
- and the client hanging. */
+ /* Whether keep-alive should be inhibited. */
bool inhibit_keep_alive =
- !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
+ !opt.http_keep_alive || opt.ignore_length;
/* Headers sent when using POST. */
wgint post_data_size = 0;
rel_value);
}
- if (!inhibit_keep_alive)
- request_set_header (req, "Connection", "Keep-Alive", rel_none);
+ if (inhibit_keep_alive)
+ request_set_header (req, "Connection", "Close", rel_none);
+ else
+ {
+ if (proxy == NULL)
+ request_set_header (req, "Connection", "Keep-Alive", rel_none);
+ else
+ {
+ request_set_header (req, "Connection", "Close", rel_none);
+ request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
+ }
+ }
if (opt.post_data || opt.post_file_name)
{
* hstat.local_file is set by http_loop to the argument of -O. */
if (!hs->local_file)
{
+ char *local_file = NULL;
+
/* Honor Content-Disposition whether possible. */
if (!opt.content_disposition
|| !resp_header_copy (resp, "Content-Disposition",
hdrval, sizeof (hdrval))
- || !parse_content_disposition (hdrval, &hs->local_file))
+ || !parse_content_disposition (hdrval, &local_file))
{
/* The Content-Disposition header is missing or broken.
* Choose unique file name according to given URL. */
- hs->local_file = url_file_name (u);
+ hs->local_file = url_file_name (u, NULL);
+ }
+ else
+ {
+ DEBUGP (("Parsed filename from Content-Disposition: %s\n",
+ local_file));
+ hs->local_file = url_file_name (u, local_file);
}
}
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
- logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"), quote (hs->local_file));
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
-
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hs->local_file))
- *dt |= TEXTHTML;
-
+ get_file_flags (hs->local_file, dt);
xfree (head);
xfree_null (message);
return RETRUNNEEDED;
CLOSE_INVALIDATE (sock);
xfree_null (type);
xfree (head);
+ /* From RFC2616: The status codes 303 and 307 have
+ been added for servers that wish to make unambiguously
+ clear which kind of reaction is expected of the client.
+
+ A 307 should be redirected using the same method,
+ in other words, a POST should be preserved and not
+ converted to a GET in that case. */
+ if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
+ return NEWLOCATION_KEEP_POST;
return NEWLOCATION;
}
}
if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
|| (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
- && contrange == 0 && hs->restval >= contlen))
+ && contrange == 0 && contlen >= 0 && hs->restval >= contlen))
{
/* If `-c' is in use and the file has been fully downloaded (or
the remote file has shrunk), Wget effectively requests bytes
fp = fopen (hs->local_file, "ab");
#endif /* def __VMS [else] */
}
- else if (ALLOW_CLOBBER)
+ else if (ALLOW_CLOBBER || count > 0)
{
+ if (opt.unlink && file_exists_p (hs->local_file))
+ {
+ int res = unlink (hs->local_file);
+ if (res < 0)
+ {
+ logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
+ strerror (errno));
+ CLOSE_INVALIDATE (sock);
+ xfree (head);
+ return UNLINKERR;
+ }
+ }
+
#ifdef __VMS
int open_id;
/* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */
uerr_t
-http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
- int *dt, struct url *proxy, struct iri *iri)
+http_loop (struct url *u, struct url *original_url, char **newloc,
+ char **local_file, const char *referer, int *dt, struct url *proxy,
+ struct iri *iri)
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
}
else if (!opt.content_disposition)
{
- hstat.local_file = url_file_name (u);
+ hstat.local_file =
+ url_file_name (opt.trustservernames ? u : original_url, NULL);
got_name = true;
}
- /* TODO: Ick! This code is now in both gethttp and http_loop, and is
- * screaming for some refactoring. */
if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
- logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"),
- quote (hstat.local_file));
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
-
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hstat.local_file))
- *dt |= TEXTHTML;
-
+ get_file_flags (hstat.local_file, dt);
ret = RETROK;
goto exit;
}
/* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- file_name = url_file_name (u);
+ file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
if (opt.timestamping && (file_exists_p (file_name)
|| opt.content_disposition))
send_head_first = true;
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
- err = gethttp (u, &hstat, dt, proxy, iri);
+ err = gethttp (u, &hstat, dt, proxy, iri, count);
/* Time? */
tms = datetime_str (time (NULL));
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
ret = err;
goto exit;
+ case UNLINKERR:
+ /* Another fatal error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"),
+ quote (hstat.local_file), strerror (errno));
+ ret = err;
+ goto exit;
case NEWLOCATION:
+ case NEWLOCATION_KEEP_POST:
/* Return the new location to the caller. */
if (!*newloc)
{
}
else
{
- ret = NEWLOCATION;
+ ret = err;
}
goto exit;
case RETRUNNEEDED:
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
- downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
else
- downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
ret = RETROK;
goto exit;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
- downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
else
- downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
ret = RETROK;
goto exit;
while (!opt.ntry || (count < opt.ntry));
exit:
- if (ret == RETROK)
+ if (ret == RETROK && local_file)
*local_file = xstrdup (hstat.local_file);
free_hstat (&hstat);
if (len == 5)
{
strncpy (shortext, ext, len - 1);
- shortext[len - 2] = '\0';
+ shortext[len - 1] = '\0';
}
if (last_period_in_local_filename == NULL
int i;
struct {
char *hdrval;
- char *opt_dir_prefix;
char *filename;
bool result;
} test_array[] = {
- { "filename=\"file.ext\"", NULL, "file.ext", true },
- { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
- { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
- { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
- { "attachment", NULL, NULL, false },
- { "attachment", "somedir", NULL, false },
- { "attachement; filename*=UTF-8'en-US'hello.txt", NULL, "hello.txt", true },
- { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", NULL, "helloworld.txt", true },
+ { "filename=\"file.ext\"", "file.ext", true },
+ { "attachment; filename=\"file.ext\"", "file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+ { "attachment", NULL, false },
+ { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
+ { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true },
};
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
char *filename;
bool res;
- opt.dir_prefix = test_array[i].opt_dir_prefix;
res = parse_content_disposition (test_array[i].hdrval, &filename);
mu_assert ("test_parse_content_disposition: wrong result",