/* HTTP support.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
Free Software Foundation, Inc.
This file is part of GNU Wget.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
# endif
#endif
-#ifdef WINDOWS
-# include <winsock.h>
-#else
-# include <netdb.h> /* for h_errno */
-#endif
-
#include "wget.h"
#include "utils.h"
#include "url.h"
# include "gen_sslfunc.h"
#endif /* HAVE_SSL */
#include "cookies.h"
+#ifdef USE_DIGEST
+# include "gen-md5.h"
+#endif
extern char *version_string;
#ifndef errno
extern int errno;
#endif
-#ifndef h_errno
-# ifndef __CYGWIN__
-extern int h_errno;
-# endif
-#endif
\f
static int cookies_loaded_p;
+struct cookie_jar *wget_cookie_jar;
#define TEXTHTML_S "text/html"
#define HTTP_ACCEPT "*/*"
return statcode;
}
\f
+#define WMIN(x, y) ((x) > (y) ? (y) : (x))
+
+/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
+ PROMISED_SIZE bytes are sent over the wire -- if the file is
+ longer, read only that much; if the file is shorter, pad it with
+ zeros. */
+
+static int
+post_file (int sock, void *ssl, const char *file_name, long promised_size)
+{
+ static char chunk[8192];
+ long written = 0;
+ int write_error;
+ FILE *fp;
+
+ /* Only one of SOCK and SSL may be active at the same time. */
+ assert (sock > -1 || ssl != NULL);
+ assert (sock == -1 || ssl == NULL);
+
+ DEBUGP (("[writing POST file %s ... ", file_name));
+
+ fp = fopen (file_name, "rb");
+ if (!fp)
+ goto pad;
+ while (written < promised_size)
+ {
+ int towrite;
+ int length = fread (chunk, 1, sizeof (chunk), fp);
+ if (length == 0)
+ break;
+ towrite = WMIN (promised_size - written, length);
+#ifdef HAVE_SSL
+ if (ssl)
+ write_error = ssl_iwrite (ssl, chunk, towrite);
+ else
+#endif
+ write_error = iwrite (sock, chunk, towrite);
+ if (write_error < 0)
+ {
+ fclose (fp);
+ return -1;
+ }
+ written += towrite;
+ }
+ fclose (fp);
+
+ pad:
+ if (written < promised_size)
+ {
+ /* This highly unlikely case can happen only if the file has
+ shrunk under us. To uphold the promise that exactly
+ promised_size bytes would be delivered, pad the remaining
+ data with zeros. #### Should we abort instead? */
+ DEBUGP (("padding %ld bytes ... ", promised_size - written));
+ memset (chunk, '\0', sizeof (chunk));
+ while (written < promised_size)
+ {
+ int towrite = WMIN (promised_size - written, sizeof (chunk));
+#ifdef HAVE_SSL
+ if (ssl)
+ write_error = ssl_iwrite (ssl, chunk, towrite);
+ else
+#endif
+ write_error = iwrite (sock, chunk, towrite);
+ if (write_error < 0)
+ return -1;
+ written += towrite;
+ }
+ }
+ assert (written == promised_size);
+ DEBUGP (("done]\n"));
+ return 0;
+}
+\f
/* Functions to be used as arguments to header_process(): */
struct http_process_range_closure {
if (!strncasecmp (hdr, "bytes", 5))
{
hdr += 5;
+ /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
+ HTTP spec. */
+ if (*hdr == ':')
+ ++hdr;
hdr += skip_lws (hdr);
if (!*hdr)
return 0;
*flag = 1;
return 1;
}
+
+/* Commit the cookie to the cookie jar. */
+
+int
+http_process_set_cookie (const char *hdr, void *arg)
+{
+ struct url *u = (struct url *)arg;
+
+ /* The jar should have been created by now. */
+ assert (wget_cookie_jar != NULL);
+
+ cookie_jar_process_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
+ hdr);
+ return 1;
+}
+
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
If a previous connection was persistent, it is closed. */
-static void
-register_persistent (const char *host, unsigned short port, int fd
#ifdef HAVE_SSL
- , SSL *ssl
-#endif
- )
+static void
+register_persistent (const char *host, unsigned short port, int fd, SSL *ssl)
+{
+#else
+static void
+register_persistent (const char *host, unsigned short port, int fd)
{
+#endif
if (pc_active_p)
{
if (pc_last_fd == fd)
DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
}
+#ifdef HAVE_SSL
+# define SHUTDOWN_SSL(ssl) do { \
+ if (ssl) \
+ shutdown_ssl (ssl); \
+} while (0)
+#else
+# define SHUTDOWN_SSL(ssl)
+#endif
+
/* Return non-zero if a persistent connection is available for
connecting to HOST:PORT. */
-static int
-persistent_available_p (const char *host, unsigned short port
#ifdef HAVE_SSL
- , int ssl
-#endif
- )
+static int
+persistent_available_p (const char *host, unsigned short port, int ssl)
+{
+#else
+static int
+persistent_available_p (const char *host, unsigned short port)
{
+#endif
int success;
struct address_list *this_host_ip;
let's invalidate the persistent connection before returning
0. */
CLOSE (pc_last_fd);
+#ifdef HAVE_SSL
+ SHUTDOWN_SSL (pc_last_ssl);
+ pc_last_ssl = NULL;
+#endif
invalidate_persistent ();
return 0;
}
return 1;
}
-#ifdef HAVE_SSL
-# define SHUTDOWN_SSL(ssl) do { \
- if (ssl) \
- shutdown_ssl (ssl); \
-} while (0)
-#else
-# define SHUTDOWN_SSL(ssl)
-#endif
-
/* The idea behind these two CLOSE macros is to distinguish between
two cases: one when the job we've been doing is finished, and we
want to close the connection and leave, and two when something is
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
- long dltime; /* time of the download */
+ double dltime; /* time of the download in msecs */
int no_truncate; /* whether truncating the file is
forbidden. */
const char *referer; /* value of the referer header. */
const char *));
static int known_authentication_scheme_p PARAMS ((const char *));
-time_t http_atotm PARAMS ((char *));
+time_t http_atotm PARAMS ((const char *));
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
char *all_headers;
char *port_maybe;
char *request_keep_alive;
- int sock, hcount, num_written, all_length, statcode;
+ int sock, hcount, all_length, statcode;
+ int write_error;
long contlen, contrange;
struct url *conn;
FILE *fp;
#ifdef HAVE_SSL
static SSL_CTX *ssl_ctx = NULL;
SSL *ssl = NULL;
-#endif /* HAVE_SSL */
+#endif
char *cookies = NULL;
/* Whether this connection will be kept alive after the HTTP request
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive;
+ /* Whether we need to print the host header with braces around host,
+ e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual
+ "Host: symbolic-name:1234". */
+ int squares_around_host = 0;
+
+ /* Headers sent when using POST. */
+ char *post_content_type, *post_content_length;
+ long post_data_size = 0;
+
#ifdef HAVE_SSL
/* initialize ssl_ctx on first run */
if (!ssl_ctx)
{
- err = init_ssl (&ssl_ctx);
+ uerr_t err = init_ssl (&ssl_ctx);
if (err != 0)
{
switch (err)
keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0;
+ post_content_type = NULL;
+ post_content_length = NULL;
+
/* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
address_list_release (al);
if (sock < 0)
- return errno == ECONNREFUSED ? CONREFUSED : CONERROR;
+ return CONNECT_ERROR (errno);
#ifdef HAVE_SSL
if (conn->scheme == SCHEME_HTTPS)
DEBUGP (("Reusing fd %d.\n", sock));
}
- command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
+ if (*dt & HEAD_ONLY)
+ command = "HEAD";
+ else if (opt.post_file_name || opt.post_data)
+ command = "POST";
+ else
+ command = "GET";
referer = NULL;
if (hs->referer)
}
else
{
+ /* Use the full path, i.e. one that includes the leading
+ slash and the query string, but is independent of proxy
+ setting. */
+ char *pth = url_full_path (u);
wwwauth = create_authorization_line (authenticate_h, user, passwd,
- command, u->path);
+ command, pth);
+ xfree (pth);
}
}
request_keep_alive = NULL;
if (opt.cookies)
- cookies = build_cookies_request (u->host, u->port, u->path,
+ cookies = cookie_jar_generate_cookie_header (wget_cookie_jar, u->host,
+ u->port, u->path,
#ifdef HAVE_SSL
- u->scheme == SCHEME_HTTPS
+ u->scheme == SCHEME_HTTPS
#else
- 0
+ 0
#endif
- );
+ );
+
+ if (opt.post_data || opt.post_file_name)
+ {
+ post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n";
+ if (opt.post_data)
+ post_data_size = strlen (opt.post_data);
+ else
+ {
+ post_data_size = file_size (opt.post_file_name);
+ if (post_data_size == -1)
+ {
+ logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
+ opt.post_file_name);
+ post_data_size = 0;
+ }
+ }
+ post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1);
+ sprintf (post_content_length,
+ "Content-Length: %ld\r\n", post_data_size);
+ }
if (proxy)
full_path = xstrdup (u->url);
else
+ /* Use the full path, i.e. one that includes the leading slash and
+ the query string. E.g. if u->path is "foo/bar" and u->query is
+ "param=value", full_path will be "/foo/bar?param=value". */
full_path = url_full_path (u);
+ if (strchr (u->host, ':'))
+ squares_around_host = 1;
+
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command)
+ strlen (full_path)
+ (proxyauth ? strlen (proxyauth) : 0)
+ (range ? strlen (range) : 0)
+ strlen (pragma_h)
+ + (post_content_type
+ ? strlen (post_content_type) : 0)
+ + (post_content_length
+ ? strlen (post_content_length) : 0)
+ (opt.user_header ? strlen (opt.user_header) : 0)
+ 64);
/* Construct the request. */
sprintf (request, "\
%s %s HTTP/1.0\r\n\
User-Agent: %s\r\n\
-Host: %s%s\r\n\
+Host: %s%s%s%s\r\n\
Accept: %s\r\n\
-%s%s%s%s%s%s%s%s\r\n",
+%s%s%s%s%s%s%s%s%s%s\r\n",
command, full_path,
- useragent, u->host,
+ useragent,
+ squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "",
port_maybe ? port_maybe : "",
HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "",
range ? range : "",
- pragma_h,
+ pragma_h,
+ post_content_type ? post_content_type : "",
+ post_content_length ? post_content_length : "",
opt.user_header ? opt.user_header : "");
- DEBUGP (("---request begin---\n%s---request end---\n", request));
+ DEBUGP (("---request begin---\n%s", request));
/* Free the temporary memory. */
FREE_MAYBE (wwwauth);
/* Send the request to server. */
#ifdef HAVE_SSL
- if (u->scheme == SCHEME_HTTPS)
- num_written = ssl_iwrite (ssl, request, strlen (request));
+ if (conn->scheme == SCHEME_HTTPS)
+ write_error = ssl_iwrite (ssl, request, strlen (request));
else
-#endif /* HAVE_SSL */
- num_written = iwrite (sock, request, strlen (request));
+#endif
+ write_error = iwrite (sock, request, strlen (request));
+
+ if (write_error >= 0)
+ {
+ if (opt.post_data)
+ {
+ DEBUGP (("[POST data: %s]\n", opt.post_data));
+#ifdef HAVE_SSL
+ if (conn->scheme == SCHEME_HTTPS)
+ write_error = ssl_iwrite (ssl, opt.post_data, post_data_size);
+ else
+#endif
+ write_error = iwrite (sock, opt.post_data, post_data_size);
+ }
+ else if (opt.post_file_name)
+ {
+#ifdef HAVE_SSL
+ if (conn->scheme == SCHEME_HTTPS)
+ write_error = post_file (-1, ssl, opt.post_file_name,
+ post_data_size);
+ else
+#endif
+ write_error = post_file (sock, NULL, opt.post_file_name,
+ post_data_size);
+ }
+ }
+ DEBUGP (("---request end---\n"));
- if (num_written < 0)
+ if (write_error < 0)
{
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
strerror (errno));
/* Before reading anything, initialize the rbuf. */
rbuf_initialize (&rbuf, sock);
#ifdef HAVE_SSL
- if (u->scheme == SCHEME_HTTPS)
+ if (conn->scheme == SCHEME_HTTPS)
rbuf.ssl = ssl;
else
rbuf.ssl = NULL;
goto done_header;
/* Try getting cookies. */
if (opt.cookies)
- if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u))
+ if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u))
goto done_header;
/* Try getting www-authentication. */
if (!authenticate_h)
}
}
- if (type && !strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)))
+ /* If content-type is not given, assume text/html. This is because
+ of the multitude of broken CGI's that "forget" to generate the
+ content-type. */
+ if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)))
*dt |= TEXTHTML;
else
- /* We don't assume text/html by default. */
*dt &= ~TEXTHTML;
if (opt.html_extension && (*dt & TEXTHTML))
/* In case the caller inspects. */
hs->len = contlen;
hs->res = 0;
+ /* Mark as successfully retrieved. */
+ *dt |= RETROKF;
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
#### A possible solution to this would be to remember the
file position in the output document and to seek to that
- position, instead of rewinding. */
- if (!hs->restval && global_download_count == 0)
+ position, instead of rewinding.
+
+ We don't truncate stdout, since that breaks
+ "wget -O - [...] >> foo".
+ */
+ if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
(contlen != -1 ? contlen : 0),
&rbuf, keep_alive, &hs->dltime);
+ if (hs->res >= 0)
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+
{
/* Close or flush the file. We have to be careful to check for
error here. Checking the result of fwrite() is not enough --
hs->res = -2;
}
FREE_MAYBE (all_headers);
- CLOSE_FINISH (sock);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
int use_ts, got_head = 0; /* time-stamping info */
char *filename_plus_orig_suffix;
char *local_filename = NULL;
- char *tms, *suf, *locf, *tmrate;
+ char *tms, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
/* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using
FTP or whatever. */
- if (opt.cookies && opt.cookies_input && !cookies_loaded_p)
+ if (opt.cookies)
{
- load_cookies (opt.cookies_input);
- cookies_loaded_p = 1;
+ if (!wget_cookie_jar)
+ wget_cookie_jar = cookie_jar_new ();
+ if (opt.cookies_input && !cookies_loaded_p)
+ {
+ cookie_jar_load (wget_cookie_jar, opt.cookies_input);
+ cookies_loaded_p = 1;
+ }
}
*newloc = NULL;
hstat.local_file = local_file;
else if (local_file)
{
- *local_file = url_filename (u);
+ *local_file = url_file_name (u);
hstat.local_file = local_file;
}
else
{
- dummy = url_filename (u);
+ dummy = url_file_name (u);
hstat.local_file = &dummy;
}
*dt |= RETROKF;
/* #### Bogusness alert. */
- /* If its suffix is "html" or "htm", assume text/html. */
- if (((suf = suffix (*hstat.local_file)) != NULL)
- && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (*hstat.local_file))
*dt |= TEXTHTML;
FREE_MAYBE (dummy);
{
use_ts = 1;
tml = st.st_mtime;
+#ifdef WINDOWS
+ /* Modification time granularity is 2 seconds for Windows, so
+ increase local time by 1 second for later comparison. */
+ tml++;
+#endif
local_size = st.st_size;
got_head = 0;
}
it is not assigned to the FSF. So I stuck it with strptime. */
time_t
-http_atotm (char *time_string)
+http_atotm (const char *time_string)
{
/* NOTE: Solaris strptime man page claims that %n and %t match white
space, but that's not universally available. Instead, we simply
/* Take the line apart to find the challenge, and compose a digest
authorization header. See RFC2069 section 2.1.2. */
-char *
+static char *
digest_authentication_encode (const char *au, const char *user,
const char *passwd, const char *method,
const char *path)
/* Calculate the digest value. */
{
- MD5_CONTEXT_TYPE ctx;
+ ALLOCA_MD5_CONTEXT (ctx);
unsigned char hash[MD5_HASHLEN];
unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
unsigned char response_digest[MD5_HASHLEN * 2 + 1];
/* A1BUF = H(user ":" realm ":" password) */
- MD5_INIT (&ctx);
- MD5_UPDATE (user, strlen (user), &ctx);
- MD5_UPDATE (":", 1, &ctx);
- MD5_UPDATE (realm, strlen (realm), &ctx);
- MD5_UPDATE (":", 1, &ctx);
- MD5_UPDATE (passwd, strlen (passwd), &ctx);
- MD5_FINISH (&ctx, hash);
+ gen_md5_init (ctx);
+ gen_md5_update ((unsigned char *)user, strlen (user), ctx);
+ gen_md5_update ((unsigned char *)":", 1, ctx);
+ gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
+ gen_md5_update ((unsigned char *)":", 1, ctx);
+ gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
+ gen_md5_finish (ctx, hash);
dump_hash (a1buf, hash);
/* A2BUF = H(method ":" path) */
- MD5_INIT (&ctx);
- MD5_UPDATE (method, strlen (method), &ctx);
- MD5_UPDATE (":", 1, &ctx);
- MD5_UPDATE (path, strlen (path), &ctx);
- MD5_FINISH (&ctx, hash);
+ gen_md5_init (ctx);
+ gen_md5_update ((unsigned char *)method, strlen (method), ctx);
+ gen_md5_update ((unsigned char *)":", 1, ctx);
+ gen_md5_update ((unsigned char *)path, strlen (path), ctx);
+ gen_md5_finish (ctx, hash);
dump_hash (a2buf, hash);
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
- MD5_INIT (&ctx);
- MD5_UPDATE (a1buf, MD5_HASHLEN * 2, &ctx);
- MD5_UPDATE (":", 1, &ctx);
- MD5_UPDATE (nonce, strlen (nonce), &ctx);
- MD5_UPDATE (":", 1, &ctx);
- MD5_UPDATE (a2buf, MD5_HASHLEN * 2, &ctx);
- MD5_FINISH (&ctx, hash);
+ gen_md5_init (ctx);
+ gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
+ gen_md5_update ((unsigned char *)":", 1, ctx);
+ gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
+ gen_md5_update ((unsigned char *)":", 1, ctx);
+ gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
+ gen_md5_finish (ctx, hash);
dump_hash (response_digest, hash);
res = (char*) xmalloc (strlen (user)