/* HTTP support.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <time.h>
# include "http-ntlm.h"
#endif
#include "cookies.h"
-#ifdef ENABLE_DIGEST
-# include "gen-md5.h"
-#endif
+#include "md5.h"
#include "convert.h"
#include "spider.h"
+#include "warc.h"
#ifdef TESTING
#include "test.h"
#endif
+#ifdef __VMS
+# include "vms.h"
+#endif /* def __VMS */
+
extern char *version_string;
/* Forward decls. */
#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
+#define H_10X(x) (((x) >= 100) && ((x) < 200))
#define H_20X(x) (((x) >= 200) && ((x) < 300))
#define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
int hcount, hcapacity;
};
+extern int numurls;
+
/* Create a new, empty request. At least request_set_method must be
called before the request can be used. */
p += A_len; \
} while (0)
-/* Construct the request and write it to FD using fd_write. */
+/* Construct the request and write it to FD using fd_write.
+ If warc_tmp is set to a file pointer, the request string will
+ also be written to that file. */
static int
-request_send (const struct request *req, int fd)
+request_send (const struct request *req, int fd, FILE *warc_tmp)
{
char *request_string, *p;
int i, size, write_error;
APPEND (p, req->method); *p++ = ' ';
APPEND (p, req->arg); *p++ = ' ';
- memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
+ memcpy (p, "HTTP/1.1\r\n", 10); p += 10;
for (i = 0; i < req->hcount; i++)
{
if (write_error < 0)
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
fd_errstr (fd));
+ else if (warc_tmp != NULL)
+ {
+ /* Write a copy of the data to the WARC record. */
+ int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp);
+ if (warc_tmp_written != size - 1)
+ return -2;
+ }
return write_error;
}
if (opt.auth_without_challenge)
{
- DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+ DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n"));
do_challenge = true;
}
else if (basic_authed_hosts
&& hash_table_contains(basic_authed_hosts, hostname))
{
- DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
+ DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
do_challenge = true;
}
else
{
- DEBUGP(("Host %s has not issued a general basic challenge.\n",
+ DEBUGP (("Host %s has not issued a general basic challenge.\n",
quote (hostname)));
}
if (do_challenge)
if (!hash_table_contains(basic_authed_hosts, hostname))
{
hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
- DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
+ DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname)));
}
}
/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is
- longer, read only that much; if the file is shorter, report an error. */
+ longer, read only that much; if the file is shorter, report an error.
+ If warc_tmp is set to a file pointer, the post data will
+ also be written to that file. */
static int
-post_file (int sock, const char *file_name, wgint promised_size)
+post_file (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp)
{
static char chunk[8192];
wgint written = 0;
fclose (fp);
return -1;
}
+ if (warc_tmp != NULL)
+ {
+ /* Write a copy of the data to the WARC record. */
+ int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp);
+ if (warc_tmp_written != towrite)
+ {
+ fclose (fp);
+ return -2;
+ }
+ }
written += towrite;
}
fclose (fp);
while (p < end && c_isdigit (*p))
++p;
if (p < end && *p == '.')
- ++p;
+ ++p;
while (p < end && c_isdigit (*p))
++p;
}
{
char *copy;
BOUNDED_TO_ALLOCA(b, e, copy);
- logprintf (LOG_ALWAYS, "%s%s\n", prefix,
+ logprintf (LOG_ALWAYS, "%s%s\n", prefix,
quotearg_style (escape_quoting_style, copy));
}
mode, the body is displayed for debugging purposes. */
static bool
-skip_short_body (int fd, wgint contlen)
+skip_short_body (int fd, wgint contlen, bool chunked)
{
enum {
SKIP_SIZE = 512, /* size of the download buffer */
SKIP_THRESHOLD = 4096 /* the largest size we read */
};
+ wgint remaining_chunk_size = 0;
char dlbuf[SKIP_SIZE + 1];
dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
- /* We shouldn't get here with unknown contlen. (This will change
- with HTTP/1.1, which supports "chunked" transfer.) */
- assert (contlen != -1);
+ assert (contlen != -1 || contlen);
/* If the body is too large, it makes more sense to simply close the
connection than to try to read the body. */
if (contlen > SKIP_THRESHOLD)
return false;
- DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
-
- while (contlen > 0)
+ while (contlen > 0 || chunked)
{
- int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
+ int ret;
+ if (chunked)
+ {
+ if (remaining_chunk_size == 0)
+ {
+ char *line = fd_read_line (fd);
+ char *endl;
+ if (line == NULL)
+ break;
+
+ remaining_chunk_size = strtol (line, &endl, 16);
+ if (remaining_chunk_size == 0)
+ {
+ fd_read_line (fd);
+ break;
+ }
+ }
+
+ contlen = MIN (remaining_chunk_size, SKIP_SIZE);
+ }
+
+ DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
+
+ ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
if (ret <= 0)
{
/* Don't normally report the error since this is an
return false;
}
contlen -= ret;
+
+ if (chunked)
+ {
+ remaining_chunk_size -= ret;
+ if (remaining_chunk_size == 0)
+ if (fd_read_line (fd) == NULL)
+ return false;
+ }
+
/* Safe even if %.*s bogusly expects terminating \0 because
we've zero-terminated dlbuf above. */
DEBUGP (("%.*s", ret, dlbuf));
return true;
}
+#define NOT_RFC2231 0
+#define RFC2231_NOENCODING 1
+#define RFC2231_ENCODING 2
+
+/* extract_param extracts the parameter name into NAME.
+ However, if the parameter name is in RFC2231 format then
+ this function adjusts NAME by stripping of the trailing
+ characters that are not part of the name but are present to
+ indicate the presence of encoding information in the value
+ or a fragment of a long parameter value
+*/
+static int
+modify_param_name(param_token *name)
+{
+ const char *delim1 = memchr (name->b, '*', name->e - name->b);
+ const char *delim2 = memrchr (name->b, '*', name->e - name->b);
+
+ int result;
+
+ if(delim1 == NULL)
+ {
+ result = NOT_RFC2231;
+ }
+ else if(delim1 == delim2)
+ {
+ if ((name->e - 1) == delim1)
+ {
+ result = RFC2231_ENCODING;
+ }
+ else
+ {
+ result = RFC2231_NOENCODING;
+ }
+ name->e = delim1;
+ }
+ else
+ {
+ name->e = delim1;
+ result = RFC2231_ENCODING;
+ }
+ return result;
+}
+
+/* extract_param extract the paramater value into VALUE.
+ Like modify_param_name this function modifies VALUE by
+ stripping off the encoding information from the actual value
+*/
+static void
+modify_param_value (param_token *value, int encoding_type )
+{
+ if (RFC2231_ENCODING == encoding_type)
+ {
+ const char *delim = memrchr (value->b, '\'', value->e - value->b);
+ if ( delim != NULL )
+ {
+ value->b = (delim+1);
+ }
+ }
+}
+
/* Extract a parameter from the string (typically an HTTP header) at
**SOURCE and advance SOURCE to the next parameter. Return false
when there are no more parameters to extract. The name of the
if (*p == separator) ++p;
}
*source = p;
+
+ int param_type = modify_param_name(name);
+ if (NOT_RFC2231 != param_type)
+ {
+ modify_param_value(value, param_type);
+ }
return true;
}
+#undef NOT_RFC2231
+#undef RFC2231_NOENCODING
+#undef RFC2231_ENCODING
+
+/* Appends the string represented by VALUE to FILENAME */
+
+static void
+append_value_to_filename (char **filename, param_token const * const value)
+{
+ int original_length = strlen(*filename);
+ int new_length = strlen(*filename) + (value->e - value->b);
+ *filename = xrealloc (*filename, new_length+1);
+ memcpy (*filename + original_length, value->b, (value->e - value->b));
+ (*filename)[new_length] = '\0';
+}
+
#undef MAX
#define MAX(p, q) ((p) > (q) ? (p) : (q))
false.
The file name is stripped of directory components and must not be
- empty. */
+ empty.
+
+ Historically, this function returned filename prefixed with opt.dir_prefix,
+ now that logic is handled by the caller, new code should pay attention,
+ changed by crq, Sep 2010.
+*/
static bool
parse_content_disposition (const char *hdr, char **filename)
{
param_token name, value;
+ *filename = NULL;
while (extract_param (&hdr, &name, &value, ';'))
- if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
- {
- /* Make the file name begin at the last slash or backslash. */
- const char *last_slash = memrchr (value.b, '/', value.e - value.b);
- const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
- if (last_slash && last_bs)
- value.b = 1 + MAX (last_slash, last_bs);
- else if (last_slash || last_bs)
- value.b = 1 + (last_slash ? last_slash : last_bs);
- if (value.b == value.e)
- continue;
- /* Start with the directory prefix, if specified. */
- if (opt.dir_prefix)
- {
- int prefix_length = strlen (opt.dir_prefix);
- bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
- int total_length;
-
- if (add_slash)
- ++prefix_length;
- total_length = prefix_length + (value.e - value.b);
- *filename = xmalloc (total_length + 1);
- strcpy (*filename, opt.dir_prefix);
- if (add_slash)
- (*filename)[prefix_length - 1] = '/';
- memcpy (*filename + prefix_length, value.b, (value.e - value.b));
- (*filename)[total_length] = '\0';
- }
- else
- *filename = strdupdelim (value.b, value.e);
- return true;
- }
- return false;
+ {
+ int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
+ if ( isFilename && value.b != NULL)
+ {
+ /* Make the file name begin at the last slash or backslash. */
+ const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+ const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+ if (last_slash && last_bs)
+ value.b = 1 + MAX (last_slash, last_bs);
+ else if (last_slash || last_bs)
+ value.b = 1 + (last_slash ? last_slash : last_bs);
+ if (value.b == value.e)
+ continue;
+
+ if (*filename)
+ append_value_to_filename (filename, &value);
+ else
+ *filename = strdupdelim (value.b, value.e);
+ }
+ }
+
+ if (*filename)
+ return true;
+ else
+ return false;
}
+
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
existence after having begun to download
(needed in gethttp for when connection is
interrupted/restarted. */
- bool timestamp_checked; /* true if pre-download time-stamping checks
+ bool timestamp_checked; /* true if pre-download time-stamping checks
* have already been performed */
char *orig_file_name; /* name of file to compare for time-stamping
* (might be != local_file if -K is set) */
wgint orig_file_size; /* size of file to compare for time-stamping */
- time_t orig_file_tstamp; /* time-stamp of file to compare for
+ time_t orig_file_tstamp; /* time-stamp of file to compare for
* time-stamping */
};
hs->error = NULL;
}
+static void
+get_file_flags (const char *filename, int *dt)
+{
+ logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"), quote (filename));
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (filename))
+ *dt |= TEXTHTML;
+}
+
+/* Download the response body from the socket and writes it to
+ an output file. The headers have already been read from the
+ socket. If WARC is enabled, the response body will also be
+ written to a WARC response record.
+
+ hs, contlen, contrange, chunked_transfer_encoding and url are
+ parameters from the gethttp method. fp is a pointer to the
+ output file.
+
+ url, warc_timestamp_str, warc_request_uuid, warc_ip, type
+ and statcode will be saved in the headers of the WARC record.
+ The head parameter contains the HTTP headers of the response.
+
+ If fp is NULL and WARC is enabled, the response body will be
+ written only to the WARC file. If WARC is disabled and fp
+ is a file pointer, the data will be written to the file.
+ If fp is a file pointer and WARC is enabled, the body will
+ be written to both destinations.
+
+ Returns the error code. */
+static int
+read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
+ wgint contrange, bool chunked_transfer_encoding,
+ char *url, char *warc_timestamp_str, char *warc_request_uuid,
+ ip_address *warc_ip, char *type, int statcode, char *head)
+{
+ int warc_payload_offset = 0;
+ FILE *warc_tmp = NULL;
+ int warcerr = 0;
+
+ if (opt.warc_filename != NULL)
+ {
+ /* Open a temporary file where we can write the response before we
+ add it to the WARC record. */
+ warc_tmp = warc_tempfile ();
+ if (warc_tmp == NULL)
+ warcerr = WARC_TMP_FOPENERR;
+
+ if (warcerr == 0)
+ {
+ /* We should keep the response headers for the WARC record. */
+ int head_len = strlen (head);
+ int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp);
+ if (warc_tmp_written != head_len)
+ warcerr = WARC_TMP_FWRITEERR;
+ warc_payload_offset = head_len;
+ }
+
+ if (warcerr != 0)
+ {
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
+ return warcerr;
+ }
+ }
+
+ if (fp != NULL)
+ {
+ /* This confuses the timestamping code that checks for file size.
+ #### The timestamping code should be smarter about file size. */
+ if (opt.save_headers && hs->restval == 0)
+ fwrite (head, 1, strlen (head), fp);
+ }
+
+ /* Read the response body. */
+ int flags = 0;
+ if (contlen != -1)
+ /* If content-length is present, read that much; otherwise, read
+ until EOF. The HTTP spec doesn't require the server to
+ actually close the connection when it's done sending data. */
+ flags |= rb_read_exactly;
+ if (fp != NULL && hs->restval > 0 && contrange == 0)
+ /* If the server ignored our range request, instruct fd_read_body
+ to skip the first RESTVAL bytes of body. */
+ flags |= rb_skip_startpos;
+ if (chunked_transfer_encoding)
+ flags |= rb_chunked_transfer_encoding;
+
+ hs->len = hs->restval;
+ hs->rd_size = 0;
+ /* Download the response body and write it to fp.
+ If we are working on a WARC file, we simultaneously write the
+ response body to warc_tmp. */
+ hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
+ hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
+ flags, warc_tmp);
+ if (hs->res >= 0)
+ {
+ if (warc_tmp != NULL)
+ {
+ /* Create a response record and write it to the WARC file.
+ Note: per the WARC standard, the request and response should share
+ the same date header. We re-use the timestamp of the request.
+ The response record should also refer to the uuid of the request. */
+ bool r = warc_write_response_record (url, warc_timestamp_str,
+ warc_request_uuid, warc_ip,
+ warc_tmp, warc_payload_offset,
+ type, statcode, hs->newloc);
+
+ /* warc_write_response_record has closed warc_tmp. */
+
+ if (! r)
+ return WARC_ERR;
+ }
+
+ return RETRFINISHED;
+ }
+
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
+
+ if (hs->res == -2)
+ {
+ /* Error while writing to fd. */
+ return FWRITEERR;
+ }
+ else if (hs->res == -3)
+ {
+ /* Error while writing to warc_tmp. */
+ return WARC_TMP_FWRITEERR;
+ }
+ else
+ {
+ /* A read error! */
+ hs->rderrmsg = xstrdup (fd_errstr (sock));
+ return RETRFINISHED;
+ }
+}
+
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
&& (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
+#ifdef __VMS
+#define SET_USER_AGENT(req) do { \
+ if (!opt.useragent) \
+ request_set_header (req, "User-Agent", \
+ aprintf ("Wget/%s (VMS %s %s)", \
+ version_string, vms_arch(), vms_vers()), \
+ rel_value); \
+ else if (*opt.useragent) \
+ request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+} while (0)
+#else /* def __VMS */
#define SET_USER_AGENT(req) do { \
if (!opt.useragent) \
request_set_header (req, "User-Agent", \
- aprintf ("Wget/%s", version_string), rel_value); \
+ aprintf ("Wget/%s (%s)", \
+ version_string, OS_TYPE), \
+ rel_value); \
else if (*opt.useragent) \
request_set_header (req, "User-Agent", opt.useragent, rel_none); \
} while (0)
+#endif /* def __VMS [else] */
/* The flags that allow clobbering the file (opening with "wb").
Defined here to avoid repetition later. #### This will require
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
-gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+ struct iri *iri, int count)
{
struct request *req;
wgint contlen, contrange;
struct url *conn;
FILE *fp;
+ int err;
int sock = -1;
- int flags;
/* Set to 1 when the authorization has already been sent and should
not be tried again. */
char hdrval[256];
char *message;
+ /* Declare WARC variables. */
+ bool warc_enabled = (opt.warc_filename != NULL);
+ FILE *warc_tmp = NULL;
+ char warc_timestamp_str [21];
+ char warc_request_uuid [48];
+ ip_address *warc_ip = NULL;
+ long int warc_payload_offset = -1;
+
/* Whether this connection will be kept alive after the HTTP request
is done. */
bool keep_alive;
- /* Whether keep-alive should be inhibited.
+ /* Is the server using the chunked transfer encoding? */
+ bool chunked_transfer_encoding = false;
- RFC 2068 requests that 1.0 clients not send keep-alive requests
- to proxies. This is because many 1.0 proxies do not interpret
- the Connection header and transfer it to the remote server,
- causing it to not close the connection and leave both the proxy
- and the client hanging. */
+ /* Whether keep-alive should be inhibited. */
bool inhibit_keep_alive =
- !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
+ !opt.http_keep_alive || opt.ignore_length;
/* Headers sent when using POST. */
wgint post_data_size = 0;
request_set_header (req, "Referer", (char *) hs->referer, rel_none);
if (*dt & SEND_NOCACHE)
- request_set_header (req, "Pragma", "no-cache", rel_none);
+ {
+ /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */
+ request_set_header (req, "Cache-Control", "no-cache, must-revalidate", rel_none);
+
+ /* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */
+ request_set_header (req, "Pragma", "no-cache", rel_none);
+ }
if (hs->restval)
request_set_header (req, "Range",
aprintf ("bytes=%s-",
rel_value);
}
- if (!inhibit_keep_alive)
- request_set_header (req, "Connection", "Keep-Alive", rel_none);
-
- if (opt.cookies)
- request_set_header (req, "Cookie",
- cookie_header (wget_cookie_jar,
- u->host, u->port, u->path,
-#ifdef HAVE_SSL
- u->scheme == SCHEME_HTTPS
-#else
- 0
-#endif
- ),
- rel_value);
+ if (inhibit_keep_alive)
+ request_set_header (req, "Connection", "Close", rel_none);
+ else
+ {
+ if (proxy == NULL)
+ request_set_header (req, "Connection", "Keep-Alive", rel_none);
+ else
+ {
+ request_set_header (req, "Connection", "Close", rel_none);
+ request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
+ }
+ }
if (opt.post_data || opt.post_file_name)
{
rel_value);
}
+ retry_with_auth:
+ /* We need to come back here when the initial attempt to retrieve
+ without authorization header fails. (Expected to happen at least
+ for the Digest authorization scheme.) */
+
+ if (opt.cookies)
+ request_set_header (req, "Cookie",
+ cookie_header (wget_cookie_jar,
+ u->host, u->port, u->path,
+#ifdef HAVE_SSL
+ u->scheme == SCHEME_HTTPS
+#else
+ 0
+#endif
+ ),
+ rel_value);
+
/* Add the user headers. */
if (opt.user_headers)
{
request_set_user_header (req, opt.user_headers[i]);
}
- retry_with_auth:
- /* We need to come back here when the initial attempt to retrieve
- without authorization header fails. (Expected to happen at least
- for the Digest authorization scheme.) */
-
proxyauth = NULL;
if (proxy)
{
request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
}
- keep_alive = false;
+ keep_alive = true;
/* Establish the connection. */
- if (!inhibit_keep_alive)
+ if (inhibit_keep_alive)
+ keep_alive = false;
+ else
{
/* Look for a persistent connection to target host, unless a
proxy is used. The exception is when SSL is in use, in which
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- quotearg_style (escape_quoting_style, pconn.host),
+ quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
that the contents of Host would be exactly the same as
the contents of CONNECT. */
- write_error = request_send (connreq, sock);
+ write_error = request_send (connreq, sock, 0);
request_free (connreq);
if (write_error < 0)
{
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ if (statcode < 0)
+ {
+ char *tms = datetime_str (time (NULL));
+ logprintf (LOG_VERBOSE, "%d\n", statcode);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+ quotearg_style (escape_quoting_style,
+ _("Malformed status line")));
+ xfree (head);
+ return HERR;
+ }
hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
if (conn->scheme == SCHEME_HTTPS)
{
- if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
+ if (!ssl_connect_wget (sock))
{
fd_close (sock);
return CONSSLERR;
}
+ else if (!ssl_check_certificate (sock, u->host))
+ {
+ fd_close (sock);
+ return VERIFCERTERR;
+ }
using_ssl = true;
}
#endif /* HAVE_SSL */
}
+ /* Open the temporary file where we will write the request. */
+ if (warc_enabled)
+ {
+ warc_tmp = warc_tempfile ();
+ if (warc_tmp == NULL)
+ {
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ return WARC_TMP_FOPENERR;
+ }
+
+ if (! proxy)
+ {
+ warc_ip = (ip_address *) alloca (sizeof (ip_address));
+ socket_ip_address (sock, warc_ip, ENDPOINT_PEER);
+ }
+ }
+
/* Send the request to server. */
- write_error = request_send (req, sock);
+ write_error = request_send (req, sock, warc_tmp);
if (write_error >= 0)
{
{
DEBUGP (("[POST data: %s]\n", opt.post_data));
write_error = fd_write (sock, opt.post_data, post_data_size, -1);
+ if (write_error >= 0 && warc_tmp != NULL)
+ {
+ /* Remember end of headers / start of payload. */
+ warc_payload_offset = ftell (warc_tmp);
+
+ /* Write a copy of the data to the WARC record. */
+ int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp);
+ if (warc_tmp_written != post_data_size)
+ write_error = -2;
+ }
}
else if (opt.post_file_name && post_data_size != 0)
- write_error = post_file (sock, opt.post_file_name, post_data_size);
+ {
+ if (warc_tmp != NULL)
+ /* Remember end of headers / start of payload. */
+ warc_payload_offset = ftell (warc_tmp);
+
+ write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp);
+ }
}
if (write_error < 0)
{
CLOSE_INVALIDATE (sock);
request_free (req);
- return WRITEFAILED;
+
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
+
+ if (write_error == -2)
+ return WARC_TMP_FWRITEERR;
+ else
+ return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
proxy ? "Proxy" : "HTTP");
contrange = 0;
*dt &= ~RETROKF;
+
+ if (warc_enabled)
+ {
+ bool warc_result;
+ /* Generate a timestamp and uuid for this request. */
+ warc_timestamp (warc_timestamp_str);
+ warc_uuid_str (warc_request_uuid);
+
+ /* Create a request record and store it in the WARC file. */
+ warc_result = warc_write_request_record (u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip,
+ warc_tmp, warc_payload_offset);
+ if (! warc_result)
+ {
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ return WARC_ERR;
+ }
+
+ /* warc_write_request_record has also closed warc_tmp. */
+ }
+
+
+read_header:
head = read_http_response_head (sock);
if (!head)
{
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
+ if (statcode < 0)
+ {
+ char *tms = datetime_str (time (NULL));
+ logprintf (LOG_VERBOSE, "%d\n", statcode);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+ quotearg_style (escape_quoting_style,
+ _("Malformed status line")));
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ xfree (head);
+ return HERR;
+ }
+
+ if (H_10X (statcode))
+ {
+ DEBUGP (("Ignoring response\n"));
+ xfree (head);
+ goto read_header;
+ }
+
hs->message = xstrdup (message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
print_server_response (resp, " ");
}
- /* Determine the local filename if needed. Notice that if -O is used
- * hstat.local_file is set by http_loop to the argument of -O. */
- if (!hs->local_file)
+ if (!opt.ignore_length
+ && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
{
- /* Honor Content-Disposition whether possible. */
- if (!opt.content_disposition
- || !resp_header_copy (resp, "Content-Disposition",
- hdrval, sizeof (hdrval))
- || !parse_content_disposition (hdrval, &hs->local_file))
+ wgint parsed;
+ errno = 0;
+ parsed = str_to_wgint (hdrval, NULL, 10);
+ if (parsed == WGINT_MAX && errno == ERANGE)
{
- /* The Content-Disposition header is missing or broken.
- * Choose unique file name according to given URL. */
- hs->local_file = url_file_name (u);
+ /* Out of range.
+ #### If Content-Length is out of range, it most likely
+ means that the file is larger than 2G and that we're
+ compiled without LFS. In that case we should probably
+ refuse to even attempt to download the file. */
+ contlen = -1;
+ }
+ else if (parsed < 0)
+ {
+ /* Negative Content-Length; nonsensical, so we can't
+ assume any information about the content to receive. */
+ contlen = -1;
}
+ else
+ contlen = parsed;
}
-
- /* TODO: perform this check only once. */
- if (!hs->existence_checked && file_exists_p (hs->local_file))
+
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
{
- if (opt.noclobber && !opt.output_document)
+ if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
{
- /* If opt.noclobber is turned on and file already exists, do not
- retrieve the file. But if the output_document was given, then this
- test was already done and the file didn't exist. Hence the !opt.output_document */
- logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"), quote (hs->local_file));
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
-
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hs->local_file))
- *dt |= TEXTHTML;
-
- return RETRUNNEEDED;
- }
- else if (!ALLOW_CLOBBER)
- {
- char *unique = unique_name (hs->local_file, true);
- if (unique != hs->local_file)
- xfree (hs->local_file);
- hs->local_file = unique;
+ if (0 == strcasecmp (hdrval, "Close"))
+ keep_alive = false;
}
}
- hs->existence_checked = true;
- /* Support timestamping */
- /* TODO: move this code out of gethttp. */
- if (opt.timestamping && !hs->timestamp_checked)
- {
- size_t filename_len = strlen (hs->local_file);
- char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
- bool local_dot_orig_file_exists = false;
- char *local_filename = NULL;
- struct_stat st;
+ chunked_transfer_encoding = false;
+ if (resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval))
+ && 0 == strcasecmp (hdrval, "chunked"))
+ chunked_transfer_encoding = true;
- if (opt.backup_converted)
- /* If -K is specified, we'll act on the assumption that it was specified
- last time these files were downloaded as well, and instead of just
- comparing local file X against server file X, we'll compare local
- file X.orig (if extant, else X) against server file X. If -K
- _wasn't_ specified last time, or the server contains files called
- *.orig, -N will be back to not operating correctly with -k. */
- {
- /* Would a single s[n]printf() call be faster? --dan
-
- Definitely not. sprintf() is horribly slow. It's a
- different question whether the difference between the two
- affects a program. Usually I'd say "no", but at one
- point I profiled Wget, and found that a measurable and
- non-negligible amount of time was lost calling sprintf()
- in url.c. Replacing sprintf with inline calls to
- strcpy() and number_to_string() made a difference.
- --hniksic */
- memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
- memcpy (filename_plus_orig_suffix + filename_len,
- ".orig", sizeof (".orig"));
-
- /* Try to stat() the .orig file. */
- if (stat (filename_plus_orig_suffix, &st) == 0)
- {
- local_dot_orig_file_exists = true;
- local_filename = filename_plus_orig_suffix;
- }
- }
-
- if (!local_dot_orig_file_exists)
- /* Couldn't stat() <file>.orig, so try to stat() <file>. */
- if (stat (hs->local_file, &st) == 0)
- local_filename = hs->local_file;
-
- if (local_filename != NULL)
- /* There was a local file, so we'll check later to see if the version
- the server has is the same version we already have, allowing us to
- skip a download. */
- {
- hs->orig_file_name = xstrdup (local_filename);
- hs->orig_file_size = st.st_size;
- hs->orig_file_tstamp = st.st_mtime;
-#ifdef WINDOWS
- /* Modification time granularity is 2 seconds for Windows, so
- increase local time by 1 second for later comparison. */
- ++hs->orig_file_tstamp;
-#endif
- }
- }
-
- if (!opt.ignore_length
- && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
+ /* Handle (possibly multiple instances of) the Set-Cookie header. */
+ if (opt.cookies)
{
- wgint parsed;
- errno = 0;
- parsed = str_to_wgint (hdrval, NULL, 10);
- if (parsed == WGINT_MAX && errno == ERANGE)
- {
- /* Out of range.
- #### If Content-Length is out of range, it most likely
- means that the file is larger than 2G and that we're
- compiled without LFS. In that case we should probably
- refuse to even attempt to download the file. */
- contlen = -1;
- }
- else if (parsed < 0)
+ int scpos;
+ const char *scbeg, *scend;
+ /* The jar should have been created by now. */
+ assert (wget_cookie_jar != NULL);
+ for (scpos = 0;
+ (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
+ &scbeg, &scend)) != -1;
+ ++scpos)
{
- /* Negative Content-Length; nonsensical, so we can't
- assume any information about the content to receive. */
- contlen = -1;
+ char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
+ cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
+ u->path, set_cookie);
}
- else
- contlen = parsed;
}
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive && contlen != -1)
- {
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
- {
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
- }
- }
if (keep_alive)
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
- CLOSE_FINISH (sock);
+
+ /* Normally we are not interested in the response body.
+ But if we are writing a WARC file we are: we like to keep everyting. */
+ if (warc_enabled)
+ {
+ int err;
+ type = resp_header_strdup (resp, "Content-Type");
+ err = read_response_body (hs, sock, NULL, contlen, 0,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
+ xfree_null (type);
+
+ if (err != RETRFINISHED || hs->res < 0)
+ {
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ return err;
+ }
+ else
+ CLOSE_FINISH (sock);
+ }
else
- CLOSE_INVALIDATE (sock);
+ {
+ /* Since WARC is disabled, we are not interested in the response body. */
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
+
pconn.authorized = false;
if (!auth_finished && (user && passwd))
{
register_basic_auth_host (u->host);
}
xfree (pth);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
goto retry_with_auth;
}
else
}
logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
return AUTHFAILED;
}
else /* statcode != HTTP_STATUS_UNAUTHORIZED */
if (ntlm_seen)
pconn.authorized = true;
}
+
+ /* Determine the local filename if needed. Notice that if -O is used
+ * hstat.local_file is set by http_loop to the argument of -O. */
+ if (!hs->local_file)
+ {
+ char *local_file = NULL;
+
+ /* Honor Content-Disposition whether possible. */
+ if (!opt.content_disposition
+ || !resp_header_copy (resp, "Content-Disposition",
+ hdrval, sizeof (hdrval))
+ || !parse_content_disposition (hdrval, &local_file))
+ {
+ /* The Content-Disposition header is missing or broken.
+ * Choose unique file name according to given URL. */
+ hs->local_file = url_file_name (u, NULL);
+ }
+ else
+ {
+ DEBUGP (("Parsed filename from Content-Disposition: %s\n",
+ local_file));
+ hs->local_file = url_file_name (u, local_file);
+ }
+ }
+
+ /* TODO: perform this check only once. */
+ if (!hs->existence_checked && file_exists_p (hs->local_file))
+ {
+ if (opt.noclobber && !opt.output_document)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
+ get_file_flags (hs->local_file, dt);
+ xfree (head);
+ xfree_null (message);
+ return RETRUNNEEDED;
+ }
+ else if (!ALLOW_CLOBBER)
+ {
+ char *unique = unique_name (hs->local_file, true);
+ if (unique != hs->local_file)
+ xfree (hs->local_file);
+ hs->local_file = unique;
+ }
+ }
+ hs->existence_checked = true;
+
+ /* Support timestamping */
+ /* TODO: move this code out of gethttp. */
+ if (opt.timestamping && !hs->timestamp_checked)
+ {
+ size_t filename_len = strlen (hs->local_file);
+ char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
+ bool local_dot_orig_file_exists = false;
+ char *local_filename = NULL;
+ struct_stat st;
+
+ if (opt.backup_converted)
+ /* If -K is specified, we'll act on the assumption that it was specified
+ last time these files were downloaded as well, and instead of just
+ comparing local file X against server file X, we'll compare local
+ file X.orig (if extant, else X) against server file X. If -K
+ _wasn't_ specified last time, or the server contains files called
+ *.orig, -N will be back to not operating correctly with -k. */
+ {
+ /* Would a single s[n]printf() call be faster? --dan
+
+ Definitely not. sprintf() is horribly slow. It's a
+ different question whether the difference between the two
+ affects a program. Usually I'd say "no", but at one
+ point I profiled Wget, and found that a measurable and
+ non-negligible amount of time was lost calling sprintf()
+ in url.c. Replacing sprintf with inline calls to
+ strcpy() and number_to_string() made a difference.
+ --hniksic */
+ memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
+ memcpy (filename_plus_orig_suffix + filename_len,
+ ORIG_SFX, sizeof (ORIG_SFX));
+
+ /* Try to stat() the .orig file. */
+ if (stat (filename_plus_orig_suffix, &st) == 0)
+ {
+ local_dot_orig_file_exists = true;
+ local_filename = filename_plus_orig_suffix;
+ }
+ }
+
+ if (!local_dot_orig_file_exists)
+ /* Couldn't stat() <file>.orig, so try to stat() <file>. */
+ if (stat (hs->local_file, &st) == 0)
+ local_filename = hs->local_file;
+
+ if (local_filename != NULL)
+ /* There was a local file, so we'll check later to see if the version
+ the server has is the same version we already have, allowing us to
+ skip a download. */
+ {
+ hs->orig_file_name = xstrdup (local_filename);
+ hs->orig_file_size = st.st_size;
+ hs->orig_file_tstamp = st.st_mtime;
+#ifdef WINDOWS
+ /* Modification time granularity is 2 seconds for Windows, so
+ increase local time by 1 second for later comparison. */
+ ++hs->orig_file_tstamp;
+#endif
+ }
+ }
+
request_free (req);
hs->statcode = statcode;
char *tmp = strchr (type, ';');
if (tmp)
{
+ /* sXXXav: only needed if IRI support is enabled */
+ char *tmp2 = tmp + 1;
+
while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
+
+ /* Try to get remote encoding if needed */
+ if (opt.enable_iri && !opt.encoding_remote)
+ {
+ tmp = parse_charset (tmp2);
+ if (tmp)
+ set_content_encoding (iri, tmp);
+ }
}
}
hs->newloc = resp_header_strdup (resp, "Location");
hs->remote_time = resp_header_strdup (resp, "Last-Modified");
- /* Handle (possibly multiple instances of) the Set-Cookie header. */
- if (opt.cookies)
- {
- int scpos;
- const char *scbeg, *scend;
- /* The jar should have been created by now. */
- assert (wget_cookie_jar != NULL);
- for (scpos = 0;
- (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
- &scbeg, &scend)) != -1;
- ++scpos)
- {
- char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
- cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
- u->path, set_cookie);
- }
- }
-
if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
{
wgint first_byte_pos, last_byte_pos, entity_length;
_("Location: %s%s\n"),
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
- CLOSE_FINISH (sock);
+
+ /* In case the caller cares to look... */
+ hs->len = 0;
+ hs->res = 0;
+ hs->restval = 0;
+
+ /* Normally we are not interested in the response body of a redirect.
+ But if we are writing a WARC file we are: we like to keep everyting. */
+ if (warc_enabled)
+ {
+ int err = read_response_body (hs, sock, NULL, contlen, 0,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
+
+ if (err != RETRFINISHED || hs->res < 0)
+ {
+ CLOSE_INVALIDATE (sock);
+ xfree_null (type);
+ xfree (head);
+ return err;
+ }
+ else
+ CLOSE_FINISH (sock);
+ }
else
- CLOSE_INVALIDATE (sock);
+ {
+ /* Since WARC is disabled, we are not interested in the response body. */
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
+
xfree_null (type);
+ xfree (head);
+ /* From RFC2616: The status codes 303 and 307 have
+ been added for servers that wish to make unambiguously
+ clear which kind of reaction is expected of the client.
+
+ A 307 should be redirected using the same method,
+ in other words, a POST should be preserved and not
+ converted to a GET in that case. */
+ if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
+ return NEWLOCATION_KEEP_POST;
return NEWLOCATION;
}
}
content-type. */
if (!type ||
0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
- 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
*dt &= ~TEXTHTML;
else
*dt &= ~TEXTCSS;
- if (opt.html_extension)
+ if (opt.adjust_extension)
{
if (*dt & TEXTHTML)
- /* -E / --html-extension / html_extension = on was specified,
+ /* -E / --adjust-extension / adjust_extension = on was specified,
and this is a text/html file. If some case-insensitive
variation on ".htm[l]" isn't already the file's suffix,
tack on ".html". */
}
if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
- || (hs->restval > 0 && statcode == HTTP_STATUS_OK
- && contrange == 0 && hs->restval >= contlen)
- )
+ || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
+ && contrange == 0 && contlen >= 0 && hs->restval >= contlen))
{
/* If `-c' is in use and the file has been fully downloaded (or
the remote file has shrunk), Wget effectively requests bytes
xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
+ xfree (head);
return RETRUNNEEDED;
}
if ((contrange != 0 && contrange != hs->restval)
Bail out. */
xfree_null (type);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RANGEERR;
}
if (contlen == -1)
logputs (LOG_VERBOSE, "\n");
}
}
- xfree_null (type);
- type = NULL; /* We don't need it any more. */
/* Return if we have no intention of further downloading. */
- if (!(*dt & RETROKF) || head_only)
+ if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
{
/* In case the caller cares to look... */
hs->len = 0;
hs->res = 0;
- xfree_null (type);
- if (head_only)
- /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
- servers not to send body in response to a HEAD request, and
- those that do will likely be caught by test_socket_open.
- If not, they can be worked around using
- `--no-http-keep-alive'. */
- CLOSE_FINISH (sock);
- else if (keep_alive && skip_short_body (sock, contlen))
- /* Successfully skipped the body; also keep using the socket. */
- CLOSE_FINISH (sock);
+ hs->restval = 0;
+
+ /* Normally we are not interested in the response body of a error responses.
+ But if we are writing a WARC file we are: we like to keep everyting. */
+ if (warc_enabled)
+ {
+ int err = read_response_body (hs, sock, NULL, contlen, 0,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
+
+ if (err != RETRFINISHED || hs->res < 0)
+ {
+ CLOSE_INVALIDATE (sock);
+ xfree (head);
+ xfree_null (type);
+ return err;
+ }
+ else
+ CLOSE_FINISH (sock);
+ }
else
- CLOSE_INVALIDATE (sock);
+ {
+ /* Since WARC is disabled, we are not interested in the response body. */
+ if (head_only)
+ /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
+ servers not to send body in response to a HEAD request, and
+ those that do will likely be caught by test_socket_open.
+ If not, they can be worked around using
+ `--no-http-keep-alive'. */
+ CLOSE_FINISH (sock);
+ else if (keep_alive
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
+ /* Successfully skipped the body; also keep using the socket. */
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
+
+ xfree (head);
+ xfree_null (type);
return RETRFINISHED;
}
+/* 2005-06-17 SMS.
+ For VMS, define common fopen() optional arguments.
+*/
+#ifdef __VMS
+# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+# define FOPEN_BIN_FLAG 3
+#else /* def __VMS */
+# define FOPEN_BIN_FLAG true
+#endif /* def __VMS [else] */
+
/* Open the local file. */
if (!output_stream)
{
if (opt.backups)
rotate_backups (hs->local_file);
if (hs->restval)
- fp = fopen (hs->local_file, "ab");
- else if (ALLOW_CLOBBER)
- fp = fopen (hs->local_file, "wb");
+ {
+#ifdef __VMS
+ int open_id;
+
+ open_id = 21;
+ fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+ fp = fopen (hs->local_file, "ab");
+#endif /* def __VMS [else] */
+ }
+ else if (ALLOW_CLOBBER || count > 0)
+ {
+ if (opt.unlink && file_exists_p (hs->local_file))
+ {
+ int res = unlink (hs->local_file);
+ if (res < 0)
+ {
+ logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
+ strerror (errno));
+ CLOSE_INVALIDATE (sock);
+ xfree (head);
+ xfree_null (type);
+ return UNLINKERR;
+ }
+ }
+
+#ifdef __VMS
+ int open_id;
+
+ open_id = 22;
+ fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+ fp = fopen (hs->local_file, "wb");
+#endif /* def __VMS [else] */
+ }
else
{
- fp = fopen_excl (hs->local_file, true);
+ fp = fopen_excl (hs->local_file, FOPEN_BIN_FLAG);
if (!fp && errno == EEXIST)
{
/* We cannot just invent a new name and use it (which is
_("%s has sprung into existence.\n"),
hs->local_file);
CLOSE_INVALIDATE (sock);
+ xfree (head);
+ xfree_null (type);
return FOPEN_EXCL_ERR;
}
}
{
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
+ xfree (head);
+ xfree_null (type);
return FOPENERR;
}
}
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
+ logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
}
-
- /* This confuses the timestamping code that checks for file size.
- #### The timestamping code should be smarter about file size. */
- if (opt.save_headers && hs->restval == 0)
- fwrite (head, 1, strlen (head), fp);
+
+
+ err = read_response_body (hs, sock, fp, contlen, contrange,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
/* Now we no longer need to store the response header. */
xfree (head);
-
- /* Download the request body. */
- flags = 0;
- if (contlen != -1)
- /* If content-length is present, read that much; otherwise, read
- until EOF. The HTTP spec doesn't require the server to
- actually close the connection when it's done sending data. */
- flags |= rb_read_exactly;
- if (hs->restval > 0 && contrange == 0)
- /* If the server ignored our range request, instruct fd_read_body
- to skip the first RESTVAL bytes of body. */
- flags |= rb_skip_startpos;
- hs->len = hs->restval;
- hs->rd_size = 0;
- hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
- hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
- flags);
+ xfree_null (type);
if (hs->res >= 0)
CLOSE_FINISH (sock);
else
- {
- if (hs->res < 0)
- hs->rderrmsg = xstrdup (fd_errstr (sock));
- CLOSE_INVALIDATE (sock);
- }
+ CLOSE_INVALIDATE (sock);
if (!output_stream)
fclose (fp);
- if (hs->res == -2)
- return FWRITEERR;
- return RETRFINISHED;
+
+ return err;
}
/* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */
uerr_t
-http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
- int *dt, struct url *proxy)
+http_loop (struct url *u, struct url *original_url, char **newloc,
+ char **local_file, const char *referer, int *dt, struct url *proxy,
+ struct iri *iri)
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
+ struct_stat st;
bool send_head_first = true;
+ char *file_name;
+ bool force_full_retrieve = false;
+
+
+ /* If we are writing to a WARC file: always retrieve the whole file. */
+ if (opt.warc_filename != NULL)
+ force_full_retrieve = true;
+
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
-
+
/* Set LOCAL_FILE parameter. */
if (local_file && opt.output_document)
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-
+
/* Reset NEWLOC parameter. */
*newloc = NULL;
here so that we don't go through the hoops if we're just using
FTP or whatever. */
if (opt.cookies)
- load_cookies();
+ load_cookies ();
/* Warn on (likely bogus) wildcard usage in HTTP. */
if (opt.ftp_glob && has_wildcards_p (u->path))
}
else if (!opt.content_disposition)
{
- hstat.local_file = url_file_name (u);
+ hstat.local_file =
+ url_file_name (opt.trustservernames ? u : original_url, NULL);
got_name = true;
}
- /* TODO: Ick! This code is now in both gethttp and http_loop, and is
- * screaming for some refactoring. */
if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
- logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"),
- quote (hstat.local_file));
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
-
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hstat.local_file))
- *dt |= TEXTHTML;
-
+ get_file_flags (hstat.local_file, dt);
ret = RETROK;
goto exit;
}
/* Reset the counter. */
count = 0;
-
+
/* Reset the document type. */
*dt = 0;
-
- /* Skip preliminary HEAD request if we're not in spider mode AND
- * if -O was given or HTTP Content-Disposition support is disabled. */
- if (!opt.spider
- && (got_name || !opt.content_disposition))
+
+ /* Skip preliminary HEAD request if we're not in spider mode. */
+ if (!opt.spider)
send_head_first = false;
- /* Send preliminary HEAD request if -N is given and we have an existing
+ /* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- if (opt.timestamping
- && !opt.content_disposition
- && file_exists_p (url_file_name (u)))
+ file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
+ if (opt.timestamping && (file_exists_p (file_name)
+ || opt.content_disposition))
send_head_first = true;
-
+ xfree (file_name);
+
/* THE loop */
do
{
/* Increment the pass counter. */
++count;
sleep_between_retrievals (count);
-
+
/* Get the current time string. */
tms = datetime_str (time (NULL));
-
+
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
Spider mode enabled. Check if remote file exists.\n"));
if (opt.verbose)
{
char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-
- if (count > 1)
+
+ if (count > 1)
{
char tmp[256];
sprintf (tmp, _("(try:%2d)"), count);
logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
tms, tmp, hurl);
}
- else
+ else
{
logprintf (LOG_NOTQUIET, "--%s-- %s\n",
tms, hurl);
}
-
+
#ifdef WINDOWS
ws_changetitle (hurl);
#endif
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (send_head_first && !got_head)
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
/* Decide whether or not to restart. */
- if (opt.always_rest
+ if (force_full_retrieve)
+ hstat.restval = hstat.len;
+ else if (opt.always_rest
&& got_name
&& stat (hstat.local_file, &st) == 0
&& S_ISREG (st.st_mode))
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
- err = gethttp (u, &hstat, dt, proxy);
+ err = gethttp (u, &hstat, dt, proxy, iri, count);
/* Time? */
tms = datetime_str (time (NULL));
-
+
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
quote (hstat.local_file), strerror (errno));
- case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
- case SSLINITFAILED: case CONTNOTSUPPORTED:
+ case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
+ case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR:
/* Fatal errors just return from the function. */
ret = err;
goto exit;
+ case WARC_ERR:
+ /* A fatal WARC error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot write to WARC file..\n"));
+ ret = err;
+ goto exit;
+ case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR:
+ /* A fatal WARC error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n"));
+ ret = err;
+ goto exit;
case CONSSLERR:
/* Another fatal error. */
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
ret = err;
goto exit;
+ case UNLINKERR:
+ /* Another fatal error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"),
+ quote (hstat.local_file), strerror (errno));
+ ret = err;
+ goto exit;
case NEWLOCATION:
+ case NEWLOCATION_KEEP_POST:
/* Return the new location to the caller. */
if (!*newloc)
{
hstat.statcode);
ret = WRONGCODE;
}
- else
+ else
{
- ret = NEWLOCATION;
+ ret = err;
}
goto exit;
case RETRUNNEEDED:
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
continue;
}
/* Maybe we should always keep track of broken links, not just in
- * spider mode. */
- else if (opt.spider)
+ * spider mode.
+ * Don't log error if it was UTF-8 encoded because we will try
+ * once unencoded. */
+ else if (opt.spider && !iri->utf8_encode)
{
/* #### Again: ugly ugly ugly! */
- if (!hurl)
+ if (!hurl)
hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
else
{
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode,
+ tms, hstat.statcode,
quotearg_style (escape_quoting_style, hstat.error));
}
logputs (LOG_VERBOSE, "\n");
if (*dt & HEAD_ONLY)
time_came_from_head = true;
}
-
+
if (send_head_first)
{
/* The time-stamping section. */
we're supposed to
download already exists. */
{
- if (hstat.remote_time &&
+ if (hstat.remote_time &&
tmr != (time_t) (-1))
{
/* Now time-stamping can be used validly.
download procedure is resumed. */
if (hstat.orig_file_tstamp >= tmr)
{
- if (hstat.contlen == -1
+ if (hstat.contlen == -1
|| hstat.orig_file_size == hstat.contlen)
{
logprintf (LOG_VERBOSE, _("\
}
}
else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
+ {
+ force_full_retrieve = true;
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
+ }
logputs (LOG_VERBOSE, "\n");
}
}
-
+
/* free_hstat (&hstat); */
hstat.timestamp_checked = true;
}
-
+
if (opt.spider)
{
bool finished = true;
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
finished = false;
}
- else
+ else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but does not contain any link -- not retrieving.\n\n"));
Remote file exists and could contain further links,\n\
but recursion is disabled -- not retrieving.\n\n"));
}
- else
+ else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists.\n\n"));
}
ret = RETROK; /* RETRUNNEEDED is not for caller. */
}
-
+
if (finished)
{
- logprintf (LOG_NONVERBOSE,
- _("%s URL:%s %2d %s\n"),
+ logprintf (LOG_NONVERBOSE,
+ _("%s URL: %s %2d %s\n"),
tms, u->url, hstat.statcode,
hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
goto exit;
continue;
} /* send_head_first */
} /* !got_head */
-
- if ((tmr != (time_t) (-1))
+
+ if (opt.useservertimestamps
+ && (tmr != (time_t) (-1))
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
const char *fl = NULL;
- if (opt.output_document)
- {
- if (output_stream_regular)
- fl = opt.output_document;
- }
- else
- fl = hstat.local_file;
+ set_local_file (&fl, hstat.local_file);
if (fl)
{
time_t newtmr = -1;
&& hstat.remote_time && hstat.remote_time[0])
{
newtmr = http_atotm (hstat.remote_time);
- if (newtmr != -1)
+ if (newtmr != (time_t)-1)
tmr = newtmr;
}
touch (fl, tmr);
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s/%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
number_to_static_string (hstat.contlen),
hstat.local_file, count);
}
- ++opt.numurls;
- total_downloaded_bytes += hstat.len;
+ ++numurls;
+ total_downloaded_bytes += hstat.rd_size;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
- downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
else
- downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
ret = RETROK;
goto exit;
else if (hstat.res == 0) /* No read error */
{
if (hstat.contlen == -1) /* We don't know how much we were supposed
- to get, so assume we succeeded. */
+ to get, so assume we succeeded. */
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
tms, u->url, number_to_static_string (hstat.len),
hstat.local_file, count);
}
- ++opt.numurls;
- total_downloaded_bytes += hstat.len;
+ ++numurls;
+ total_downloaded_bytes += hstat.rd_size;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
- downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
else
- downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
-
+ downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+
ret = RETROK;
goto exit;
}
while (!opt.ntry || (count < opt.ntry));
exit:
- if (ret == RETROK)
+ if (ret == RETROK && local_file)
*local_file = xstrdup (hstat.local_file);
free_hstat (&hstat);
-
+
return ret;
}
\f
Netscape cookie specification.) */
};
const char *oldlocale;
+ char savedlocale[256];
size_t i;
time_t ret = (time_t) -1;
non-English locales, which we work around by temporarily setting
locale to C before invoking strptime. */
oldlocale = setlocale (LC_TIME, NULL);
+ if (oldlocale)
+ {
+ size_t l = strlen (oldlocale) + 1;
+ if (l >= sizeof savedlocale)
+ savedlocale[0] = '\0';
+ else
+ memcpy (savedlocale, oldlocale, l);
+ }
+ else savedlocale[0] = '\0';
+
setlocale (LC_TIME, "C");
for (i = 0; i < countof (time_formats); i++)
}
/* Restore the previous locale. */
- setlocale (LC_TIME, oldlocale);
+ if (savedlocale[0])
+ setlocale (LC_TIME, savedlocale);
return ret;
}
{
int i;
- for (i = 0; i < MD5_HASHLEN; i++, hash++)
+ for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++)
{
*buf++ = XNUM_TO_digit (*hash >> 4);
*buf++ = XNUM_TO_digit (*hash & 0xf);
/* Calculate the digest value. */
{
- ALLOCA_MD5_CONTEXT (ctx);
- unsigned char hash[MD5_HASHLEN];
- char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
- char response_digest[MD5_HASHLEN * 2 + 1];
+ struct md5_ctx ctx;
+ unsigned char hash[MD5_DIGEST_SIZE];
+ char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1];
+ char response_digest[MD5_DIGEST_SIZE * 2 + 1];
/* A1BUF = H(user ":" realm ":" password) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)user, strlen (user), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)user, strlen (user), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (a1buf, hash);
/* A2BUF = H(method ":" path) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)method, strlen (method), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)path, strlen (path), ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)method, strlen (method), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)path, strlen (path), &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (a2buf, hash);
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (response_digest, hash);
res = xmalloc (strlen (user)
+ strlen (realm)
+ strlen (nonce)
+ strlen (path)
- + 2 * MD5_HASHLEN /*strlen (response_digest)*/
+ + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
+ (opaque ? strlen (opaque) : 0)
+ 128);
sprintf (res, "Digest \
if (len == 5)
{
strncpy (shortext, ext, len - 1);
- shortext[len - 2] = '\0';
+ shortext[len - 1] = '\0';
}
if (last_period_in_local_filename == NULL
{
int i;
struct {
- char *hdrval;
- char *opt_dir_prefix;
+ char *hdrval;
char *filename;
bool result;
} test_array[] = {
- { "filename=\"file.ext\"", NULL, "file.ext", true },
- { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
- { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
- { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
- { "attachment", NULL, NULL, false },
- { "attachment", "somedir", NULL, false },
+ { "filename=\"file.ext\"", "file.ext", true },
+ { "attachment; filename=\"file.ext\"", "file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+ { "attachment", NULL, false },
+ { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
+ { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true },
};
-
- for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
+
+ for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
char *filename;
bool res;
- opt.dir_prefix = test_array[i].opt_dir_prefix;
res = parse_content_disposition (test_array[i].hdrval, &filename);
- mu_assert ("test_parse_content_disposition: wrong result",
+ mu_assert ("test_parse_content_disposition: wrong result",
res == test_array[i].result
- && (res == false
+ && (res == false
|| 0 == strcmp (test_array[i].filename, filename)));
}