#else
# include <strings.h>
#endif
-#include <ctype.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#ifdef HAVE_SSL
# include "gen_sslfunc.h"
#endif /* HAVE_SSL */
+#include "cookies.h"
extern char *version_string;
extern int errno;
#endif
#ifndef h_errno
+# ifndef __CYGWIN__
extern int h_errno;
+# endif
#endif
\f
+static int cookies_loaded_p;
#define TEXTHTML_S "text/html"
#define HTTP_ACCEPT "*/*"
char *error; /* textual HTTP error */
int statcode; /* status code */
long dltime; /* time of the download */
+ int no_truncate; /* whether truncating the file is
+ forbidden. */
};
/* Free the elements of hstat X. */
const char *));
static int known_authentication_scheme_p PARAMS ((const char *));
-static time_t http_atotm PARAMS ((char *));
+time_t http_atotm PARAMS ((char *));
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
static SSL_CTX *ssl_ctx = NULL;
SSL *ssl = NULL;
#endif /* HAVE_SSL */
+ struct wget_timer *timer;
+ char *cookies = NULL;
/* Whether this connection will be kept alive after the HTTP request
is done. */
int inhibit_keep_alive;
#ifdef HAVE_SSL
-/* initialize ssl_ctx on first run */
+ /* initialize ssl_ctx on first run */
if (!ssl_ctx)
- init_ssl (&ssl_ctx);
+ {
+ err=init_ssl (&ssl_ctx);
+ if (err != 0)
+ {
+ switch (err)
+ {
+ case SSLERRCTXCREATE:
+ /* this is fatal */
+ logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
+ ssl_printerrors ();
+ return err;
+ case SSLERRCERTFILE:
+ /* try without certfile */
+ logprintf (LOG_NOTQUIET,
+ _("Failed to load certificates from %s\n"),
+ opt.sslcertfile);
+ ssl_printerrors ();
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ case SSLERRCERTKEY:
+ logprintf (LOG_NOTQUIET,
+ _("Failed to get certificate key from %s\n"),
+ opt.sslcertkey);
+ ssl_printerrors ();
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ default:
+ break;
+ }
+ }
+ }
#endif /* HAVE_SSL */
if (!(*dt & HEAD_ONLY))
keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0;
+ if (opt.cookies)
+ cookies = build_cookies_request (u->host, u->port, u->path,
+ u->proto == URLHTTPS);
+
/* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
{
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
err = make_connection (&sock, u->host, u->port);
- switch (err)
+ switch (err)
{
case HOSTERR:
logputs (LOG_VERBOSE, "\n");
/* String of the form :PORT. Used only for non-standard ports. */
port_maybe = NULL;
- if (remport != 80)
+ if (1
+#ifdef HAVE_SSL
+ && remport != (u->proto == URLHTTPS
+ ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
+#else
+ && remport != DEFAULT_HTTP_PORT
+#endif
+ )
{
port_maybe = (char *)alloca (numdigit (remport) + 2);
sprintf (port_maybe, ":%d", remport);
+ (request_keep_alive
? strlen (request_keep_alive) : 0)
+ (referer ? strlen (referer) : 0)
+ + (cookies ? strlen (cookies) : 0)
+ (wwwauth ? strlen (wwwauth) : 0)
+ (proxyauth ? strlen (proxyauth) : 0)
+ (range ? strlen (range) : 0)
User-Agent: %s\r\n\
Host: %s%s\r\n\
Accept: %s\r\n\
-%s%s%s%s%s%s%s\r\n",
+%s%s%s%s%s%s%s%s\r\n",
command, path, useragent, remhost,
port_maybe ? port_maybe : "",
HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
referer ? referer : "",
+ cookies ? cookies : "",
wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "",
range ? range : "",
/* Free the temporary memory. */
FREE_MAYBE (wwwauth);
FREE_MAYBE (proxyauth);
+ FREE_MAYBE (cookies);
/* Send the request to server. */
#ifdef HAVE_SSL
if (header_process (hdr, "Last-Modified", header_strdup,
&hs->remote_time))
goto done_header;
+ /* Try getting cookies. */
+ if (opt.cookies)
+ if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u))
+ goto done_header;
/* Try getting www-authentication. */
if (!authenticate_h)
if (header_process (hdr, "WWW-Authenticate", header_strdup,
FREE_MAYBE (type);
type = NULL;
FREEHSTAT (*hs);
- CLOSE_FINISH (sock);
+ CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
+ might be more bytes in the body. */
if (auth_tried_already)
{
/* If we have tried it already, then there is not point
}
if (contrange == -1)
- hs->restval = 0;
+ {
+ /* We did not get a content-range header. This means that the
+ server did not honor our `Range' request. Normally, this
+ means we should reset hs->restval and continue normally. */
+
+ /* However, if `-c' is used, we need to be a bit more careful:
+
+ 1. If `-c' is specified and the file already existed when
+ Wget was started, it would be a bad idea for us to start
+ downloading it from scratch, effectively truncating it. I
+ believe this cannot happen unless `-c' was specified.
+
+ 2. If `-c' is used on a file that is already fully
+ downloaded, we're requesting bytes after the end of file,
+ which can result in server not honoring `Range'. If this is
+ the case, `Content-Length' will be equal to the length of the
+ file. */
+ if (opt.always_rest)
+ {
+ /* Check for condition #2. */
+ if (hs->restval == contlen)
+ {
+ logputs (LOG_VERBOSE, _("\
+\n The file is already fully retrieved; nothing to do.\n\n"));
+ /* In case the caller inspects. */
+ hs->len = contlen;
+ hs->res = 0;
+ FREE_MAYBE (type);
+ FREE_MAYBE (hs->newloc);
+ FREE_MAYBE (all_headers);
+ CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
+ might be more bytes in the body. */
+ return RETRFINISHED;
+ }
+
+ /* Check for condition #1. */
+ if (hs->no_truncate)
+ {
+ logprintf (LOG_NOTQUIET,
+ _("\
+\n\
+ The server does not support continued download;\n\
+ refusing to truncate `%s'.\n\n"), u->local);
+ return CONTNOTSUPPORTED;
+ }
+
+ /* Fallthrough */
+ }
+
+ hs->restval = 0;
+ }
+
else if (contrange != hs->restval ||
(H_PARTIAL (statcode) && contrange == -1))
{
_("Location: %s%s\n"),
hs->newloc ? hs->newloc : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- CLOSE_FINISH (sock);
+ CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
+ might be more bytes in the body. */
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
return NEWLOCATION;
/* Return if we have no intention of further downloading. */
if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
{
- /* In case someone cares to look... */
+ /* In case the caller cares to look... */
hs->len = 0L;
hs->res = 0;
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
- CLOSE_FINISH (sock);
+ CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
+ might be more bytes in the body. */
return RETRFINISHED;
}
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
- CLOSE_FINISH (sock);
+ CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
+ might be more bytes in the body. */
FREE_MAYBE (all_headers);
return FOPENERR;
}
}
else /* opt.dfp */
{
+ extern int global_download_count;
fp = opt.dfp;
- if (!hs->restval)
+ /* To ensure that repeated "from scratch" downloads work for -O
+ files, we rewind the file pointer, unless restval is
+ non-zero. (This works only when -O is used on regular files,
+ but it's still a valuable feature.)
+
+ However, this loses when more than one URL is specified on
+ the command line the second rewinds eradicates the contents
+ of the first download. Thus we disable the above trick for
+ all the downloads except the very first one.
+
+ #### A possible solution to this would be to remember the
+ file position in the output document and to seek to that
+ position, instead of rewinding. */
+ if (!hs->restval && global_download_count == 0)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
rewind (fp);
+ /* ftruncate is needed because opt.dfp is opened in append
+ mode if opt.always_rest is set. */
+ ftruncate (fileno (fp), 0);
clearerr (fp);
}
}
should be some overhead information. */
if (opt.save_headers)
fwrite (all_headers, 1, all_length, fp);
- reset_timer ();
+ timer = wtimer_new ();
/* Get the contents of the document. */
hs->res = get_contents (sock, fp, &hs->len, hs->restval,
(contlen != -1 ? contlen : 0),
&rbuf, keep_alive);
- hs->dltime = elapsed_time ();
+ hs->dltime = wtimer_elapsed (timer);
+ wtimer_delete (timer);
{
/* Close or flush the file. We have to be careful to check for
error here. Checking the result of fwrite() is not enough --
struct http_stat hstat; /* HTTP status */
struct stat st;
+ /* This used to be done in main(), but it's a better idea to do it
+ here so that we don't go through the hoops if we're just using
+ FTP or whatever. */
+ if (opt.cookies && opt.cookies_input && !cookies_loaded_p)
+ load_cookies (opt.cookies_input);
+
*newloc = NULL;
/* Warn on (likely bogus) wildcard usage in HTTP. Don't use
{
/* Would a single s[n]printf() call be faster? --dan
- It wouldn't. sprintf() is horribly slow. At one point I
- profiled Wget, and found that a measurable and
+ Definitely not. sprintf() is horribly slow. It's a
+ different question whether the difference between the two
+ affects a program. Usually I'd say "no", but at one
+ point I profiled Wget, and found that a measurable and
non-negligible amount of time was lost calling sprintf()
in url.c. Replacing sprintf with inline calls to
strcpy() and long_to_string() made a difference.
hstat.restval = 0L;
/* Decide whether or not to restart. */
if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
- && file_exists_p (u->local))
- if (stat (u->local, &st) == 0)
+ && file_exists_p (locf))
+ if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
hstat.restval = st.st_size;
/* Decide whether to send the no-cache directive. */
if (u->proxy && (count > 1 || (opt.proxy_cache == 0)))
else
locf = opt.output_document;
+ /* In `-c' is used, check whether the file we're writing to
+ exists before we've done anything. If so, we'll refuse to
+ truncate it if the server doesn't support continued
+ downloads. */
+ if (opt.always_rest)
+ hstat.no_truncate = file_exists_p (locf);
+
/* Time? */
tms = time_str (NULL);
/* Get the new location (with or without the redirection). */
printwhat (count, opt.ntry);
continue;
break;
- case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
+ case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
+ case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */
FREEHSTAT (hstat);
xfree (filename_plus_orig_suffix); /* must precede every return! */
FREEHSTAT (hstat);
return err;
break;
- case CONSSLERR:
+ case CONSSLERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
strings within it will no longer be used. */
FREEHSTAT (hstat);
- tmrate = rate (hstat.len - hstat.restval, hstat.dltime);
+ tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
if (hstat.len == hstat.contlen)
{
++p;
if (!*p
|| (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
- || ((p[0] == '+' || p[1] == '-') && ISDIGIT (p[1])))
+ || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
return 1;
else
return 0;
Marcus Hennecke's atotm(), which is forgiving, fast, to-the-point,
and does not use strptime(). atotm() is to be found in the sources
of `phttpd', a little-known HTTP server written by Peter Erikson. */
-static time_t
+time_t
http_atotm (char *time_string)
{
struct tm t;
/* RFC1123: Thu, 29 Jan 1998 22:12:57 */
if (check_end (strptime (time_string, "%a, %d %b %Y %T", &t)))
return mktime_from_utc (&t);
- /* RFC850: Thu, 29-Jan-98 22:12:57 */
- if (check_end (strptime (time_string, "%a, %d-%b-%y %T", &t)))
+ /* RFC850: Thursday, 29-Jan-98 22:12:57 */
+ if (check_end (strptime (time_string, "%A, %d-%b-%y %T", &t)))
+ return mktime_from_utc (&t);
+ /* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
+ (google.com uses this for their cookies.)*/
+ if (check_end (strptime (time_string, "%a, %d-%b-%Y %T", &t)))
return mktime_from_utc (&t);
/* asctime: Thu Jan 29 22:12:57 1998 */
if (check_end (strptime (time_string, "%a %b %d %T %Y", &t)))
return 0;
}
-/* Response value needs to be in lowercase, so we cannot use HEXD2ASC
- from url.h. See RFC 2069 2.1.2 for the syntax of response-digest. */
-#define HEXD2asc(x) (((x) < 10) ? ((x) + '0') : ((x) - 10 + 'a'))
-
/* Dump the hexadecimal representation of HASH to BUF. HASH should be
an array of 16 bytes containing the hash keys, and BUF should be a
buffer of 33 writable characters (32 for hex digits plus one for
for (i = 0; i < MD5_HASHLEN; i++, hash++)
{
- *buf++ = HEXD2asc (*hash >> 4);
- *buf++ = HEXD2asc (*hash & 0xf);
+ *buf++ = XDIGIT_TO_xchar (*hash >> 4);
+ *buf++ = XDIGIT_TO_xchar (*hash & 0xf);
}
*buf = '\0';
}