/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#define USE_GNULIB_ALLOC
-
#include "wget.h"
#include <stdio.h>
#include "test.h"
#endif
+#ifdef __VMS
+# include "vms.h"
+#endif /* def __VMS */
+
extern char *version_string;
/* Forward decls. */
+struct http_stat;
static char *create_authorization_line (const char *, const char *,
const char *, const char *,
const char *, bool *);
static char *basic_authentication_encode (const char *, const char *);
static bool known_authentication_scheme_p (const char *, const char *);
+static void ensure_extension (struct http_stat *, const char *, int *);
static void load_cookies (void);
#ifndef MIN
#define TEXTHTML_S "text/html"
#define TEXTXHTML_S "application/xhtml+xml"
+#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
#define H_20X(x) (((x) >= 200) && ((x) < 300))
int hcount, hcapacity;
};
+extern int numurls;
+
/* Create a new, empty request. At least request_set_method must be
called before the request can be used. */
{
char *copy;
BOUNDED_TO_ALLOCA(b, e, copy);
- logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint(copy));
+ logprintf (LOG_ALWAYS, "%s%s\n", prefix,
+ quotearg_style (escape_quoting_style, copy));
}
/* Print the server response, line by line, omitting the trailing CRLF
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
+ char *message; /* status message */
wgint rd_size; /* amount of data read from socket */
double dltime; /* time it took to download the data */
const char *referer; /* value of the referer header. */
xfree_null (hs->rderrmsg);
xfree_null (hs->local_file);
xfree_null (hs->orig_file_name);
+ xfree_null (hs->message);
/* Guard against being called twice. */
hs->newloc = NULL;
&& (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
+#ifdef __VMS
#define SET_USER_AGENT(req) do { \
if (!opt.useragent) \
request_set_header (req, "User-Agent", \
- aprintf ("Wget/%s", version_string), rel_value); \
+ aprintf ("Wget/%s (VMS %s %s)", \
+ version_string, vms_arch(), vms_vers()), \
+ rel_value); \
else if (*opt.useragent) \
request_set_header (req, "User-Agent", opt.useragent, rel_none); \
} while (0)
+#else /* def __VMS */
+#define SET_USER_AGENT(req) do { \
+ if (!opt.useragent) \
+ request_set_header (req, "User-Agent", \
+ aprintf ("Wget/%s (%s)", \
+ version_string, OS_TYPE), \
+ rel_value); \
+ else if (*opt.useragent) \
+ request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+} while (0)
+#endif /* def __VMS [else] */
/* The flags that allow clobbering the file (opening with "wb").
Defined here to avoid repetition later. #### This will require
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
-gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+ struct iri *iri)
{
struct request *req;
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
+ hs->message = NULL;
conn = u;
user = user ? user : (opt.http_user ? opt.http_user : opt.user);
passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
- if (user && passwd
- && !u->user) /* We only do "site-wide" authentication with "global"
- user/password values; URL user/password info overrides. */
+ /* We only do "site-wide" authentication with "global" user/password
+ * values unless --auth-no-challange has been requested; URL user/password
+ * info overrides. */
+ if (user && passwd && (!u->user || opt.auth_without_challenge))
{
/* If this is a host for which we've already received a Basic
* challenge, we'll go ahead and send Basic authentication creds. */
basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
}
- proxyauth = NULL;
- if (proxy)
- {
- char *proxy_user, *proxy_passwd;
- /* For normal username and password, URL components override
- command-line/wgetrc parameters. With proxy
- authentication, it's the reverse, because proxy URLs are
- normally the "permanent" ones, so command-line args
- should take precedence. */
- if (opt.proxy_user && opt.proxy_passwd)
- {
- proxy_user = opt.proxy_user;
- proxy_passwd = opt.proxy_passwd;
- }
- else
- {
- proxy_user = proxy->user;
- proxy_passwd = proxy->passwd;
- }
- /* #### This does not appear right. Can't the proxy request,
- say, `Digest' authentication? */
- if (proxy_user && proxy_passwd)
- proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
-
- /* If we're using a proxy, we will be connecting to the proxy
- server. */
- conn = proxy;
-
- /* Proxy authorization over SSL is handled below. */
-#ifdef HAVE_SSL
- if (u->scheme != SCHEME_HTTPS)
-#endif
- request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
- }
-
/* Generate the Host header, HOST:PORT. Take into account that:
- Broken server-side software often doesn't recognize the PORT
without authorization header fails. (Expected to happen at least
for the Digest authorization scheme.) */
+ proxyauth = NULL;
+ if (proxy)
+ {
+ char *proxy_user, *proxy_passwd;
+ /* For normal username and password, URL components override
+ command-line/wgetrc parameters. With proxy
+ authentication, it's the reverse, because proxy URLs are
+ normally the "permanent" ones, so command-line args
+ should take precedence. */
+ if (opt.proxy_user && opt.proxy_passwd)
+ {
+ proxy_user = opt.proxy_user;
+ proxy_passwd = opt.proxy_passwd;
+ }
+ else
+ {
+ proxy_user = proxy->user;
+ proxy_passwd = proxy->passwd;
+ }
+ /* #### This does not appear right. Can't the proxy request,
+ say, `Digest' authentication? */
+ if (proxy_user && proxy_passwd)
+ proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
+
+ /* If we're using a proxy, we will be connecting to the proxy
+ server. */
+ conn = proxy;
+
+ /* Proxy authorization over SSL is handled below. */
+#ifdef HAVE_SSL
+ if (u->scheme != SCHEME_HTTPS)
+#endif
+ request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+ }
+
keep_alive = false;
/* Establish the connection. */
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- escnonprint (pconn.host), pconn.port);
+ quotearg_style (escape_quoting_style, pconn.host),
+ pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
/* If the connection is already authorized, the "Basic"
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
if (statcode != 200)
{
failed_tunnel:
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
- message ? escnonprint (message) : "?");
+ message ? quotearg_style (escape_quoting_style, message) : "?");
xfree_null (message);
return CONSSLERR;
}
if (conn->scheme == SCHEME_HTTPS)
{
- if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
+ if (!ssl_connect_wget (sock) || !ssl_check_certificate (sock, u->host))
{
fd_close (sock);
return CONSSLERR;
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
- message ? escnonprint (message) : "");
+ message ? quotearg_style (escape_quoting_style, message) : "");
else
{
logprintf (LOG_VERBOSE, "\n");
print_server_response (resp, " ");
}
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
+ {
+ if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = true;
+ else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
+ {
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = true;
+ }
+ }
+
+ if (keep_alive)
+ /* The server has promised that it will not close the connection
+ when we're done. This means that we can register it. */
+ register_persistent (conn->host, conn->port, sock, using_ssl);
+
+ if (statcode == HTTP_STATUS_UNAUTHORIZED)
+ {
+ /* Authorization is required. */
+ if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ pconn.authorized = false;
+ if (!auth_finished && (user && passwd))
+ {
+ /* IIS sends multiple copies of WWW-Authenticate, one with
+ the value "negotiate", and other(s) with data. Loop over
+ all the occurrences and pick the one we recognize. */
+ int wapos;
+ const char *wabeg, *waend;
+ char *www_authenticate = NULL;
+ for (wapos = 0;
+ (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
+ &wabeg, &waend)) != -1;
+ ++wapos)
+ if (known_authentication_scheme_p (wabeg, waend))
+ {
+ BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
+ break;
+ }
+
+ if (!www_authenticate)
+ {
+ /* If the authentication header is missing or
+ unrecognized, there's no sense in retrying. */
+ logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+ }
+ else if (!basic_auth_finished
+ || !BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ char *pth;
+ pth = url_full_path (u);
+ request_set_header (req, "Authorization",
+ create_authorization_line (www_authenticate,
+ user, passwd,
+ request_method (req),
+ pth,
+ &auth_finished),
+ rel_value);
+ if (BEGINS_WITH (www_authenticate, "NTLM"))
+ ntlm_seen = true;
+ else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ /* Need to register this host as using basic auth,
+ * so we automatically send creds next time. */
+ register_basic_auth_host (u->host);
+ }
+ xfree (pth);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ goto retry_with_auth;
+ }
+ else
+ {
+ /* We already did Basic auth, and it failed. Gotta
+ * give up. */
+ }
+ }
+ logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
+ request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ return AUTHFAILED;
+ }
+ else /* statcode != HTTP_STATUS_UNAUTHORIZED */
+ {
+ /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
+ if (ntlm_seen)
+ pconn.authorized = true;
+ }
+
/* Determine the local filename if needed. Notice that if -O is used
* hstat.local_file is set by http_loop to the argument of -O. */
if (!hs->local_file)
hs->local_file = url_file_name (u);
}
}
-
+
/* TODO: perform this check only once. */
if (!hs->existence_checked && file_exists_p (hs->local_file))
{
- if (opt.noclobber)
+ if (opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
File %s already there; not retrieving.\n\n"), quote (hs->local_file));
/* If the file is there, we suppose it's retrieved OK. */
if (has_html_suffix_p (hs->local_file))
*dt |= TEXTHTML;
+ xfree (head);
+ xfree_null (message);
return RETRUNNEEDED;
}
else if (!ALLOW_CLOBBER)
if (opt.timestamping && !hs->timestamp_checked)
{
size_t filename_len = strlen (hs->local_file);
- char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
+ char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
bool local_dot_orig_file_exists = false;
char *local_filename = NULL;
struct_stat st;
--hniksic */
memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
memcpy (filename_plus_orig_suffix + filename_len,
- ".orig", sizeof (".orig"));
+ ORIG_SFX, sizeof (ORIG_SFX));
/* Try to stat() the .orig file. */
if (stat (filename_plus_orig_suffix, &st) == 0)
local_dot_orig_file_exists = true;
local_filename = filename_plus_orig_suffix;
}
- }
+ }
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
contlen = parsed;
}
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive && contlen != -1)
- {
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
- {
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
- }
- }
- if (keep_alive)
- /* The server has promised that it will not close the connection
- when we're done. This means that we can register it. */
- register_persistent (conn->host, conn->port, sock, using_ssl);
-
- if (statcode == HTTP_STATUS_UNAUTHORIZED)
- {
- /* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
- CLOSE_FINISH (sock);
- else
- CLOSE_INVALIDATE (sock);
- pconn.authorized = false;
- if (!auth_finished && (user && passwd))
- {
- /* IIS sends multiple copies of WWW-Authenticate, one with
- the value "negotiate", and other(s) with data. Loop over
- all the occurrences and pick the one we recognize. */
- int wapos;
- const char *wabeg, *waend;
- char *www_authenticate = NULL;
- for (wapos = 0;
- (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
- &wabeg, &waend)) != -1;
- ++wapos)
- if (known_authentication_scheme_p (wabeg, waend))
- {
- BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
- break;
- }
-
- if (!www_authenticate)
- {
- /* If the authentication header is missing or
- unrecognized, there's no sense in retrying. */
- logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
- }
- else if (!basic_auth_finished
- || !BEGINS_WITH (www_authenticate, "Basic"))
- {
- char *pth;
- pth = url_full_path (u);
- request_set_header (req, "Authorization",
- create_authorization_line (www_authenticate,
- user, passwd,
- request_method (req),
- pth,
- &auth_finished),
- rel_value);
- if (BEGINS_WITH (www_authenticate, "NTLM"))
- ntlm_seen = true;
- else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
- {
- /* Need to register this host as using basic auth,
- * so we automatically send creds next time. */
- register_basic_auth_host (u->host);
- }
- xfree (pth);
- goto retry_with_auth;
- }
- else
- {
- /* We already did Basic auth, and it failed. Gotta
- * give up. */
- }
- }
- logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
- request_free (req);
- return AUTHFAILED;
- }
- else /* statcode != HTTP_STATUS_UNAUTHORIZED */
- {
- /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
- if (ntlm_seen)
- pconn.authorized = true;
- }
request_free (req);
hs->statcode = statcode;
char *tmp = strchr (type, ';');
if (tmp)
{
+ /* sXXXav: only needed if IRI support is enabled */
+ char *tmp2 = tmp + 1;
+
while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
+
+ /* Try to get remote encoding if needed */
+ if (opt.enable_iri && !opt.encoding_remote)
+ {
+ tmp = parse_charset (tmp2);
+ if (tmp)
+ set_content_encoding (iri, tmp);
+ }
}
}
hs->newloc = resp_header_strdup (resp, "Location");
else
CLOSE_INVALIDATE (sock);
xfree_null (type);
+ xfree (head);
return NEWLOCATION;
}
}
else
*dt &= ~TEXTHTML;
- if (opt.html_extension && (*dt & TEXTHTML))
- /* -E / --html-extension / html_extension = on was specified, and this is a
- text/html file. If some case-insensitive variation on ".htm[l]" isn't
- already the file's suffix, tack on ".html". */
- {
- char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ if (type &&
+ 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
+ *dt |= TEXTCSS;
+ else
+ *dt &= ~TEXTCSS;
- if (last_period_in_local_filename == NULL
- || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
- || 0 == strcasecmp (last_period_in_local_filename, ".html")))
+ if (opt.html_extension)
+ {
+ if (*dt & TEXTHTML)
+ /* -E / --html-extension / html_extension = on was specified,
+ and this is a text/html file. If some case-insensitive
+ variation on ".htm[l]" isn't already the file's suffix,
+ tack on ".html". */
{
- int local_filename_len = strlen (hs->local_file);
- /* Resize the local file, allowing for ".html" preceded by
- optional ".NUMBER". */
- hs->local_file = xrealloc (hs->local_file,
- local_filename_len + 24 + sizeof (".html"));
- strcpy(hs->local_file + local_filename_len, ".html");
- /* If clobbering is not allowed and the file, as named,
- exists, tack on ".NUMBER.html" instead. */
- if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
- {
- int ext_num = 1;
- do
- sprintf (hs->local_file + local_filename_len,
- ".%d.html", ext_num++);
- while (file_exists_p (hs->local_file));
- }
- *dt |= ADDED_HTML_EXTENSION;
+ ensure_extension (hs, ".html", dt);
+ }
+ else if (*dt & TEXTCSS)
+ {
+ ensure_extension (hs, ".css", dt);
}
}
- if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
+ if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
+ || (hs->restval > 0 && statcode == HTTP_STATUS_OK
+ && contrange == 0 && hs->restval >= contlen)
+ )
{
/* If `-c' is in use and the file has been fully downloaded (or
the remote file has shrunk), Wget effectively requests bytes
- after the end of file and the server response with 416. */
+ after the end of file and the server response with 416
+ (or 200 with a <= Content-Length. */
logputs (LOG_VERBOSE, _("\
\n The file is already fully retrieved; nothing to do.\n\n"));
/* In case the caller inspects. */
xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
+ xfree (head);
return RETRUNNEEDED;
}
if ((contrange != 0 && contrange != hs->restval)
Bail out. */
xfree_null (type);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RANGEERR;
}
if (contlen == -1)
logputs (LOG_VERBOSE,
opt.ignore_length ? _("ignored") : _("unspecified"));
if (type)
- logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
+ logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type));
else
logputs (LOG_VERBOSE, "\n");
}
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RETRFINISHED;
}
+/* 2005-06-17 SMS.
+ For VMS, define common fopen() optional arguments.
+*/
+#ifdef __VMS
+# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+# define FOPEN_BIN_FLAG 3
+#else /* def __VMS */
+# define FOPEN_BIN_FLAG 1
+#endif /* def __VMS [else] */
+
/* Open the local file. */
if (!output_stream)
{
if (opt.backups)
rotate_backups (hs->local_file);
if (hs->restval)
- fp = fopen (hs->local_file, "ab");
+ {
+#ifdef __VMS
+ int open_id;
+
+ open_id = 21;
+ fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+ fp = fopen (hs->local_file, "ab");
+#endif /* def __VMS [else] */
+ }
else if (ALLOW_CLOBBER)
- fp = fopen (hs->local_file, "wb");
+ {
+#ifdef __VMS
+ int open_id;
+
+ open_id = 22;
+ fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+ fp = fopen (hs->local_file, "wb");
+#endif /* def __VMS [else] */
+ }
else
{
fp = fopen_excl (hs->local_file, true);
_("%s has sprung into existence.\n"),
hs->local_file);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPEN_EXCL_ERR;
}
}
{
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPENERR;
}
}
retried, and retried, and retried, and... */
uerr_t
http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
- int *dt, struct url *proxy)
+ int *dt, struct url *proxy, struct iri *iri)
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
+ struct_stat st;
bool send_head_first = true;
+ char *file_name;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
-
+
/* Set LOCAL_FILE parameter. */
if (local_file && opt.output_document)
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-
+
/* Reset NEWLOC parameter. */
*newloc = NULL;
/* TODO: Ick! This code is now in both gethttp and http_loop, and is
* screaming for some refactoring. */
- if (got_name && file_exists_p (hstat.local_file) && opt.noclobber)
+ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"),
+File %s already there; not retrieving.\n\n"),
quote (hstat.local_file));
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
if (has_html_suffix_p (hstat.local_file))
*dt |= TEXTHTML;
- return RETRUNNEEDED;
+ ret = RETROK;
+ goto exit;
}
/* Reset the counter. */
count = 0;
-
+
/* Reset the document type. */
*dt = 0;
-
+
/* Skip preliminary HEAD request if we're not in spider mode AND
* if -O was given or HTTP Content-Disposition support is disabled. */
if (!opt.spider
/* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- if (opt.timestamping
+ file_name = url_file_name (u);
+ if (opt.timestamping
&& !opt.content_disposition
- && file_exists_p (url_file_name (u)))
+ && file_exists_p (file_name))
send_head_first = true;
+ xfree (file_name);
/* THE loop */
do
/* Increment the pass counter. */
++count;
sleep_between_retrievals (count);
-
+
/* Get the current time string. */
tms = datetime_str (time (NULL));
-
+
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
Spider mode enabled. Check if remote file exists.\n"));
if (opt.verbose)
{
char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-
- if (count > 1)
+
+ if (count > 1)
{
char tmp[256];
sprintf (tmp, _("(try:%2d)"), count);
logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
tms, tmp, hurl);
}
- else
+ else
{
logprintf (LOG_NOTQUIET, "--%s-- %s\n",
tms, hurl);
}
-
+
#ifdef WINDOWS
ws_changetitle (hurl);
#endif
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (send_head_first && !got_head)
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
- err = gethttp (u, &hstat, dt, proxy);
+ err = gethttp (u, &hstat, dt, proxy, iri);
/* Time? */
tms = datetime_str (time (NULL));
-
+
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
hstat.statcode);
ret = WRONGCODE;
}
- else
+ else
{
ret = NEWLOCATION;
}
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
continue;
}
/* Maybe we should always keep track of broken links, not just in
- * spider mode. */
- else if (opt.spider)
+ * spider mode.
+ * Don't log error if it was UTF-8 encoded because we will try
+ * once unencoded. */
+ else if (opt.spider && !iri->utf8_encode)
{
/* #### Again: ugly ugly ugly! */
- if (!hurl)
+ if (!hurl)
hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
else
{
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
+ tms, hstat.statcode,
+ quotearg_style (escape_quoting_style, hstat.error));
}
logputs (LOG_VERBOSE, "\n");
ret = WRONGCODE;
if (opt.spider)
{
+ bool finished = true;
if (opt.recursive)
{
if (*dt & TEXTHTML)
{
logputs (LOG_VERBOSE, _("\
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ finished = false;
}
else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but does not contain any link -- not retrieving.\n\n"));
ret = RETROK; /* RETRUNNEEDED is not for caller. */
- goto exit;
}
}
else
Remote file exists.\n\n"));
}
ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ }
+
+ if (finished)
+ {
+ logprintf (LOG_NONVERBOSE,
+ _("%s URL: %s %2d %s\n"),
+ tms, u->url, hstat.statcode,
+ hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
goto exit;
}
}
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
const char *fl = NULL;
- if (opt.output_document)
- {
- if (output_stream_regular)
- fl = opt.output_document;
- }
- else
- fl = hstat.local_file;
+ set_local_file (&fl, hstat.local_file);
if (fl)
{
time_t newtmr = -1;
&& hstat.remote_time && hstat.remote_time[0])
{
newtmr = http_atotm (hstat.remote_time);
- if (newtmr != -1)
+ if (newtmr != (time_t)-1)
tmr = newtmr;
}
touch (fl, tmr);
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s/%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
number_to_static_string (hstat.contlen),
hstat.local_file, count);
}
- ++opt.numurls;
+ ++numurls;
total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
tms, u->url, number_to_static_string (hstat.len),
hstat.local_file, count);
}
- ++opt.numurls;
+ ++numurls;
total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
printwhat (count, opt.ntry);
continue;
}
- else
+ else if (hstat.len != hstat.restval)
/* Getting here would mean reading more data than
requested with content-length, which we never do. */
abort ();
+ else
+ {
+ /* Getting here probably means that the content-length was
+ * _less_ than the original, local size. We should probably
+ * truncate or re-read, or something. FIXME */
+ ret = RETROK;
+ goto exit;
+ }
}
else /* from now on hstat.res can only be -1 */
{
Netscape cookie specification.) */
};
const char *oldlocale;
- int i;
+ char savedlocale[256];
+ size_t i;
time_t ret = (time_t) -1;
/* Solaris strptime fails to recognize English month names in
non-English locales, which we work around by temporarily setting
locale to C before invoking strptime. */
oldlocale = setlocale (LC_TIME, NULL);
+ if (oldlocale)
+ {
+ size_t l = strlen (oldlocale);
+ if (l >= sizeof savedlocale)
+ savedlocale[0] = '\0';
+ else
+ memcpy (savedlocale, oldlocale, l);
+ }
+ else savedlocale[0] = '\0';
+
setlocale (LC_TIME, "C");
for (i = 0; i < countof (time_formats); i++)
}
/* Restore the previous locale. */
- setlocale (LC_TIME, oldlocale);
+ if (savedlocale[0])
+ setlocale (LC_TIME, savedlocale);
return ret;
}
au += 6; /* skip over `Digest' */
while (extract_param (&au, &name, &value, ','))
{
- int i;
+ size_t i;
+ size_t namelen = name.e - name.b;
for (i = 0; i < countof (options); i++)
- if (name.e - name.b == strlen (options[i].name)
- && 0 == strncmp (name.b, options[i].name, name.e - name.b))
+ if (namelen == strlen (options[i].name)
+ && 0 == strncmp (name.b, options[i].name,
+ namelen))
{
*options[i].variable = strdupdelim (value.b, value.e);
break;
first argument and are followed by whitespace or terminating \0.
The comparison is case-insensitive. */
#define STARTS(literal, b, e) \
- ((e) - (b) >= STRSIZE (literal) \
+ ((e > b) \
+ && ((size_t) ((e) - (b))) >= STRSIZE (literal) \
&& 0 == strncasecmp (b, literal, STRSIZE (literal)) \
- && ((e) - (b) == STRSIZE (literal) \
+ && ((size_t) ((e) - (b)) == STRSIZE (literal) \
|| c_isspace (b[STRSIZE (literal)])))
static bool
cookie_jar_delete (wget_cookie_jar);
}
+void
+ensure_extension (struct http_stat *hs, const char *ext, int *dt)
+{
+ char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ char shortext[8];
+ int len = strlen (ext);
+ if (len == 5)
+ {
+ strncpy (shortext, ext, len - 1);
+ shortext[len - 2] = '\0';
+ }
+
+ if (last_period_in_local_filename == NULL
+ || !(0 == strcasecmp (last_period_in_local_filename, shortext)
+ || 0 == strcasecmp (last_period_in_local_filename, ext)))
+ {
+ int local_filename_len = strlen (hs->local_file);
+ /* Resize the local file, allowing for ".html" preceded by
+ optional ".NUMBER". */
+ hs->local_file = xrealloc (hs->local_file,
+ local_filename_len + 24 + len);
+ strcpy (hs->local_file + local_filename_len, ext);
+ /* If clobbering is not allowed and the file, as named,
+ exists, tack on ".NUMBER.html" instead. */
+ if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
+ {
+ int ext_num = 1;
+ do
+ sprintf (hs->local_file + local_filename_len,
+ ".%d%s", ext_num++, ext);
+ while (file_exists_p (hs->local_file));
+ }
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+}
+
#ifdef TESTING