struct http_stat;
static char *create_authorization_line (const char *, const char *,
const char *, const char *,
- const char *, bool *);
+ const char *, bool *, uerr_t *);
static char *basic_authentication_encode (const char *, const char *);
static bool known_authentication_scheme_p (const char *, const char *);
static void ensure_extension (struct http_stat *, const char *, int *);
char dlbuf[SKIP_SIZE + 1];
dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
- assert (contlen != -1 || contlen);
-
/* If the body is too large, it makes more sense to simply close the
connection than to try to read the body. */
if (contlen > SKIP_THRESHOLD)
else if(delim1 == delim2)
{
if ((name->e - 1) == delim1)
- {
- result = RFC2231_ENCODING;
- }
+ {
+ result = RFC2231_ENCODING;
+ }
else
- {
- result = RFC2231_NOENCODING;
- }
+ {
+ result = RFC2231_NOENCODING;
+ }
name->e = delim1;
}
else
static void
modify_param_value (param_token *value, int encoding_type )
{
- if (RFC2231_ENCODING == encoding_type)
+ if (encoding_type == RFC2231_ENCODING)
{
const char *delim = memrchr (value->b, '\'', value->e - value->b);
- if ( delim != NULL )
- {
- value->b = (delim+1);
- }
+ if (delim != NULL)
+ {
+ value->b = (delim+1);
+ }
}
}
filename=\"foo bar\"", the first call to this function will return
the token named "attachment" and no value, and the second call will
return the token named "filename" and value "foo bar". The third
- call will return false, indicating no more valid tokens. */
+ call will return false, indicating no more valid tokens.
+
+ is_url_encoded is an out parameter. If not NULL, a boolean value will be
+ stored into it, letting the caller know whether or not the extracted value is
+ URL-encoded. The caller can then decode it with url_unescape(), which however
+ performs decoding in-place. URL-encoding is used by RFC 2231 to support
+ non-US-ASCII characters in HTTP header values. */
bool
extract_param (const char **source, param_token *name, param_token *value,
- char separator)
+ char separator, bool *is_url_encoded)
{
const char *p = *source;
+ int param_type;
+ if (is_url_encoded)
+ *is_url_encoded = false; /* initializing the out parameter */
while (c_isspace (*p)) ++p;
if (!*p)
}
*source = p;
- int param_type = modify_param_name(name);
- if (NOT_RFC2231 != param_type)
+ param_type = modify_param_name(name);
+ if (param_type != NOT_RFC2231)
{
+ if (param_type == RFC2231_ENCODING && is_url_encoded)
+ *is_url_encoded = true;
modify_param_value(value, param_type);
}
return true;
/* Appends the string represented by VALUE to FILENAME */
static void
-append_value_to_filename (char **filename, param_token const * const value)
+append_value_to_filename (char **filename, param_token const * const value,
+ bool is_url_encoded)
{
int original_length = strlen(*filename);
int new_length = strlen(*filename) + (value->e - value->b);
*filename = xrealloc (*filename, new_length+1);
- memcpy (*filename + original_length, value->b, (value->e - value->b));
+ memcpy (*filename + original_length, value->b, (value->e - value->b));
(*filename)[new_length] = '\0';
+ if (is_url_encoded)
+ url_unescape (*filename + original_length);
}
#undef MAX
{
param_token name, value;
*filename = NULL;
- while (extract_param (&hdr, &name, &value, ';'))
+ bool is_url_encoded = false;
+ for ( ; extract_param (&hdr, &name, &value, ';', &is_url_encoded);
+ is_url_encoded = false)
{
int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
if ( isFilename && value.b != NULL)
continue;
if (*filename)
- append_value_to_filename (filename, &value);
+ append_value_to_filename (filename, &value, is_url_encoded);
else
- *filename = strdupdelim (value.b, value.e);
+ {
+ *filename = strdupdelim (value.b, value.e);
+ if (is_url_encoded)
+ url_unescape (*filename);
+ }
}
}
url, warc_timestamp_str, warc_request_uuid, warc_ip, type
and statcode will be saved in the headers of the WARC record.
The head parameter contains the HTTP headers of the response.
-
+
If fp is NULL and WARC is enabled, the response body will be
written only to the WARC file. If WARC is disabled and fp
is a file pointer, the data will be written to the file.
If fp is a file pointer and WARC is enabled, the body will
be written to both destinations.
-
+
Returns the error code. */
static int
read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
/* Download the response body and write it to fp.
If we are working on a WARC file, we simultaneously write the
response body to warc_tmp. */
- hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
+ hs->res = fd_read_body (hs->local_file, sock, fp, contlen != -1 ? contlen : 0,
hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
flags, warc_tmp);
if (hs->res >= 0)
return RETRFINISHED;
}
-
+
if (warc_tmp != NULL)
fclose (warc_tmp);
} while (0)
#endif /* def __VMS [else] */
-/* The flags that allow clobbering the file (opening with "wb").
- Defined here to avoid repetition later. #### This will require
- rework. */
-#define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
- || opt.dirstruct || opt.output_document)
-
/* Retrieve a document through HTTP protocol. It recognizes status
code, and correctly handles redirections. It closes the network
socket. If it receives an error from the functions below it, it
char *head;
struct response *resp;
- char hdrval[256];
+ char hdrval[512];
char *message;
/* Declare WARC variables. */
exec_name, quote (relevant->host));
return HOSTERR;
}
+ else if (sock != -1)
+ {
+ sock = -1;
+ }
}
if (sock < 0)
the regular request below. */
proxyauth = NULL;
}
- /* Examples in rfc2817 use the Host header in CONNECT
- requests. I don't see how that gains anything, given
- that the contents of Host would be exactly the same as
- the contents of CONNECT. */
+ request_set_header (connreq, "Host",
+ aprintf ("%s:%d", u->host, u->port),
+ rel_value);
write_error = request_send (connreq, sock, 0);
request_free (connreq);
But if we are writing a WARC file we are: we like to keep everyting. */
if (warc_enabled)
{
- int err;
+ int _err;
type = resp_header_strdup (resp, "Content-Type");
- err = read_response_body (hs, sock, NULL, contlen, 0,
+ _err = read_response_body (hs, sock, NULL, contlen, 0,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
xfree_null (type);
- if (err != RETRFINISHED || hs->res < 0)
+ if (_err != RETRFINISHED || hs->res < 0)
{
CLOSE_INVALIDATE (sock);
request_free (req);
xfree_null (message);
resp_free (resp);
xfree (head);
- return err;
+ return _err;
}
else
CLOSE_FINISH (sock);
}
pconn.authorized = false;
+ uerr_t auth_err = RETROK;
if (!auth_finished && (user && passwd))
{
/* IIS sends multiple copies of WWW-Authenticate, one with
else if (!basic_auth_finished
|| !BEGINS_WITH (www_authenticate, "Basic"))
{
- char *pth;
- pth = url_full_path (u);
- request_set_header (req, "Authorization",
- create_authorization_line (www_authenticate,
- user, passwd,
- request_method (req),
- pth,
- &auth_finished),
- rel_value);
- if (BEGINS_WITH (www_authenticate, "NTLM"))
- ntlm_seen = true;
- else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ char *pth = url_full_path (u);
+ const char *value;
+ uerr_t *auth_stat;
+ auth_stat = xmalloc (sizeof (uerr_t));
+ *auth_stat = RETROK;
+
+ value = create_authorization_line (www_authenticate,
+ user, passwd,
+ request_method (req),
+ pth,
+ &auth_finished,
+ auth_stat);
+
+ auth_err = *auth_stat;
+ if (auth_err == RETROK)
{
- /* Need to register this host as using basic auth,
- * so we automatically send creds next time. */
- register_basic_auth_host (u->host);
+ request_set_header (req, "Authorization", value, rel_value);
+
+ if (BEGINS_WITH (www_authenticate, "NTLM"))
+ ntlm_seen = true;
+ else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ /* Need to register this host as using basic auth,
+ * so we automatically send creds next time. */
+ register_basic_auth_host (u->host);
+ }
+
+ xfree (pth);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ xfree (auth_stat);
+ goto retry_with_auth;
+ }
+ else
+ {
+ /* Creating the Authorization header went wrong */
}
- xfree (pth);
- xfree_null (message);
- resp_free (resp);
- xfree (head);
- goto retry_with_auth;
}
else
{
* give up. */
}
}
- logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
request_free (req);
xfree_null (message);
resp_free (resp);
xfree (head);
- return AUTHFAILED;
+ if (auth_err == RETROK)
+ return AUTHFAILED;
+ else
+ return auth_err;
}
else /* statcode != HTTP_STATUS_UNAUTHORIZED */
{
tmp = parse_charset (tmp2);
if (tmp)
set_content_encoding (iri, tmp);
+ xfree_null(tmp);
}
}
}
if (H_20X (statcode))
*dt |= RETROKF;
+ if (statcode == HTTP_STATUS_NO_CONTENT)
+ {
+ /* 204 response has no body (RFC 2616, 4.3) */
+
+ /* In case the caller cares to look... */
+ hs->len = 0;
+ hs->res = 0;
+ hs->restval = 0;
+
+ CLOSE_FINISH (sock);
+ xfree_null (type);
+ xfree (head);
+
+ return RETRFINISHED;
+ }
+
/* Return if redirected. */
if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
{
_("Location: %s%s\n"),
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
-
+
/* In case the caller cares to look... */
hs->len = 0;
hs->res = 0;
But if we are writing a WARC file we are: we like to keep everyting. */
if (warc_enabled)
{
- int err = read_response_body (hs, sock, NULL, contlen, 0,
+ int _err = read_response_body (hs, sock, NULL, contlen, 0,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
- if (err != RETRFINISHED || hs->res < 0)
+ if (_err != RETRFINISHED || hs->res < 0)
{
CLOSE_INVALIDATE (sock);
xfree_null (type);
xfree (head);
- return err;
+ return _err;
}
else
CLOSE_FINISH (sock);
{
case HTTP_STATUS_TEMPORARY_REDIRECT:
return NEWLOCATION_KEEP_POST;
- break;
case HTTP_STATUS_MOVED_PERMANENTLY:
if (opt.method && strcasecmp (opt.method, "post") != 0)
return NEWLOCATION_KEEP_POST;
break;
default:
return NEWLOCATION;
- break;
}
return NEWLOCATION;
}
logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
if (contlen + contrange >= 1024)
logprintf (LOG_VERBOSE, " (%s)",
- human_readable (contlen + contrange));
+ human_readable (contlen + contrange, 10, 1));
if (contrange)
{
if (contlen >= 1024)
logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
number_to_static_string (contlen),
- human_readable (contlen));
+ human_readable (contlen, 10, 1));
else
logprintf (LOG_VERBOSE, _(", %s remaining"),
number_to_static_string (contlen));
}
/* Return if we have no intention of further downloading. */
- if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only
- || (opt.method && strcasecmp (opt.method, "get") != 0))
+ if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
{
/* In case the caller cares to look... */
hs->len = 0;
But if we are writing a WARC file we are: we like to keep everyting. */
if (warc_enabled)
{
- int err = read_response_body (hs, sock, NULL, contlen, 0,
+ int _err = read_response_body (hs, sock, NULL, contlen, 0,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
- if (err != RETRFINISHED || hs->res < 0)
+ if (_err != RETRFINISHED || hs->res < 0)
{
CLOSE_INVALIDATE (sock);
xfree (head);
xfree_null (type);
- return err;
+ return _err;
}
else
CLOSE_FINISH (sock);
}
else if (ALLOW_CLOBBER || count > 0)
{
- if (opt.unlink && file_exists_p (hs->local_file))
- {
- int res = unlink (hs->local_file);
- if (res < 0)
- {
- logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
- strerror (errno));
- CLOSE_INVALIDATE (sock);
- xfree (head);
- xfree_null (type);
- return UNLINKERR;
- }
- }
+ if (opt.unlink && file_exists_p (hs->local_file))
+ {
+ int res = unlink (hs->local_file);
+ if (res < 0)
+ {
+ logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
+ strerror (errno));
+ CLOSE_INVALIDATE (sock);
+ xfree (head);
+ xfree_null (type);
+ return UNLINKERR;
+ }
+ }
#ifdef __VMS
int open_id;
fp = output_stream;
/* Print fetch message, if opt.verbose. */
- if (opt.verbose)
- {
- logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
+ logprintf (LOG_VERBOSE, _("Saving to: %s\n"),
HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
- }
err = read_response_body (hs, sock, fp, contlen, contrange,
/* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
+ if (!opt.output_document)
+ file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
+ else
+ file_name = xstrdup (opt.output_document);
if (opt.timestamping && (file_exists_p (file_name)
|| opt.content_disposition))
send_head_first = true;
/* Decide whether or not to restart. */
if (force_full_retrieve)
hstat.restval = hstat.len;
+ else if (opt.start_pos >= 0)
+ hstat.restval = opt.start_pos;
else if (opt.always_rest
&& got_name
&& stat (hstat.local_file, &st) == 0
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
quote (hstat.local_file), strerror (errno));
- case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
- case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR:
- case FILEBADFILE:
+ case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case SSLINITFAILED:
+ case CONTNOTSUPPORTED: case VERIFCERTERR: case FILEBADFILE:
+ case UNKNOWNATTR:
/* Fatal errors just return from the function. */
ret = err;
goto exit;
+ case ATTRMISSING:
+ /* A missing attribute in a Header is a fatal Protocol error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Required attribute missing from Header received.\n"));
+ ret = err;
+ goto exit;
+ case AUTHFAILED:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Username/Password Authentication Failed.\n"));
+ ret = err;
+ goto exit;
case WARC_ERR:
/* A fatal WARC error. */
logputs (LOG_VERBOSE, "\n");
static char *
digest_authentication_encode (const char *au, const char *user,
const char *passwd, const char *method,
- const char *path)
+ const char *path, uerr_t *auth_err)
{
static char *realm, *opaque, *nonce, *qop, *algorithm;
static struct {
param_token name, value;
- realm = opaque = nonce = qop = algorithm = NULL;
+ realm = opaque = nonce = algorithm = qop = NULL;
au += 6; /* skip over `Digest' */
- while (extract_param (&au, &name, &value, ','))
+ while (extract_param (&au, &name, &value, ',', NULL))
{
size_t i;
size_t namelen = name.e - name.b;
if (qop != NULL && strcmp(qop,"auth"))
{
logprintf (LOG_NOTQUIET, _("Unsupported quality of protection '%s'.\n"), qop);
- user = NULL; /* force freeing mem and return */
+ xfree_null (qop); /* force freeing mem and return */
+ qop = NULL;
}
-
- if (algorithm != NULL && strcmp (algorithm,"MD5") && strcmp (algorithm,"MD5-sess"))
+ else if (algorithm != NULL && strcmp (algorithm,"MD5") && strcmp (algorithm,"MD5-sess"))
{
logprintf (LOG_NOTQUIET, _("Unsupported algorithm '%s'.\n"), algorithm);
- user = NULL; /* force freeing mem and return */
+ xfree_null (qop); /* force freeing mem and return */
+ qop = NULL;
}
- if (!realm || !nonce || !user || !passwd || !path || !method)
+ if (!realm || !nonce || !user || !passwd || !path || !method || !qop)
{
xfree_null (realm);
xfree_null (opaque);
xfree_null (nonce);
xfree_null (qop);
xfree_null (algorithm);
+ if (!qop)
+ *auth_err = UNKNOWNATTR;
+ else
+ *auth_err = ATTRMISSING;
return NULL;
}
dump_hash (a1buf, hash);
- if (! strcmp (algorithm, "MD5-sess"))
+ if (algorithm && !strcmp (algorithm, "MD5-sess"))
{
/* A1BUF = H( H(user ":" realm ":" password) ":" nonce ":" cnonce ) */
snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX));
md5_finish_ctx (&ctx, hash);
dump_hash (a2buf, hash);
- if (!strcmp(qop, "auth") || !strcmp (qop, "auth-int"))
+ if (qop && (!strcmp(qop, "auth") || !strcmp (qop, "auth-int")))
{
/* RFC 2617 Digest Access Authentication */
/* generate random hex string */
res = xmalloc (res_size);
- if (!strcmp(qop,"auth"))
+ if (qop && !strcmp (qop, "auth"))
{
res_len = snprintf (res, res_size, "Digest "\
"username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\""\
snprintf(res + res_len, res_size - res_len, ", algorithm=\"%s\"", algorithm);
}
}
+
+ xfree_null (realm);
+ xfree_null (opaque);
+ xfree_null (nonce);
+ xfree_null (qop);
+ xfree_null (algorithm);
+
return res;
}
#endif /* ENABLE_DIGEST */
static char *
create_authorization_line (const char *au, const char *user,
const char *passwd, const char *method,
- const char *path, bool *finished)
+ const char *path, bool *finished, uerr_t *auth_err)
{
/* We are called only with known schemes, so we can dispatch on the
first letter. */
#ifdef ENABLE_DIGEST
case 'D': /* Digest */
*finished = true;
- return digest_authentication_encode (au, user, passwd, method, path);
+ return digest_authentication_encode (au, user, passwd, method, path, auth_err);
#endif
#ifdef ENABLE_NTLM
case 'N': /* NTLM */
#ifdef TESTING
const char *
-test_parse_content_disposition()
+test_parse_content_disposition(void)
{
- int i;
- struct {
- char *hdrval;
- char *filename;
+ unsigned i;
+ static const struct {
+ const char *hdrval;
+ const char *filename;
bool result;
} test_array[] = {
{ "filename=\"file.ext\"", "file.ext", true },
{ "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true },
};
- for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
+ for (i = 0; i < countof(test_array); ++i)
{
char *filename;
bool res;