/* HTTP support.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
Free Software Foundation, Inc.
This file is part of GNU Wget.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
# endif
#endif
-#ifdef WINDOWS
-# include <winsock.h>
-#else
-# include <netdb.h> /* for h_errno */
-#endif
-
#include "wget.h"
#include "utils.h"
#include "url.h"
#include "retr.h"
#include "headers.h"
#include "connect.h"
-#include "fnmatch.h"
#include "netrc.h"
#ifdef HAVE_SSL
# include "gen_sslfunc.h"
#ifdef USE_DIGEST
# include "gen-md5.h"
#endif
+#include "convert.h"
extern char *version_string;
#ifndef errno
extern int errno;
#endif
-#ifndef h_errno
-# ifndef __CYGWIN__
-extern int h_errno;
-# endif
-#endif
\f
static int cookies_loaded_p;
+struct cookie_jar *wget_cookie_jar;
#define TEXTHTML_S "text/html"
+#define TEXTXHTML_S "application/xhtml+xml"
#define HTTP_ACCEPT "*/*"
/* Some status code validation macros: */
return statcode;
}
\f
+#define WMIN(x, y) ((x) > (y) ? (y) : (x))
+
+/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
+ PROMISED_SIZE bytes are sent over the wire -- if the file is
+ longer, read only that much; if the file is shorter, report an error. */
+
+static int
+post_file (int sock, void *ssl, const char *file_name, long promised_size)
+{
+ static char chunk[8192];
+ long written = 0;
+ int write_error;
+ FILE *fp;
+
+ /* Only one of SOCK and SSL may be active at the same time. */
+ assert (sock > -1 || ssl != NULL);
+ assert (sock == -1 || ssl == NULL);
+
+ DEBUGP (("[writing POST file %s ... ", file_name));
+
+ fp = fopen (file_name, "rb");
+ if (!fp)
+ return -1;
+ while (!feof (fp) && written < promised_size)
+ {
+ int towrite;
+ int length = fread (chunk, 1, sizeof (chunk), fp);
+ if (length == 0)
+ break;
+ towrite = WMIN (promised_size - written, length);
+#ifdef HAVE_SSL
+ if (ssl)
+ write_error = ssl_iwrite (ssl, chunk, towrite);
+ else
+#endif
+ write_error = iwrite (sock, chunk, towrite);
+ if (write_error < 0)
+ {
+ fclose (fp);
+ return -1;
+ }
+ written += towrite;
+ }
+ fclose (fp);
+
+ /* If we've written less than was promised, report a (probably
+ nonsensical) error rather than break the promise. */
+ if (written < promised_size)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ assert (written == promised_size);
+ DEBUGP (("done]\n"));
+ return 0;
+}
+\f
/* Functions to be used as arguments to header_process(): */
struct http_process_range_closure {
*flag = 1;
return 1;
}
+
+/* Commit the cookie to the cookie jar. */
+
+int
+http_process_set_cookie (const char *hdr, void *arg)
+{
+ struct url *u = (struct url *)arg;
+
+ /* The jar should have been created by now. */
+ assert (wget_cookie_jar != NULL);
+
+ cookie_jar_process_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
+ hdr);
+ return 1;
+}
+
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
If a previous connection was persistent, it is closed. */
-static void
-register_persistent (const char *host, unsigned short port, int fd
#ifdef HAVE_SSL
- , SSL *ssl
-#endif
- )
+static void
+register_persistent (const char *host, unsigned short port, int fd, SSL *ssl)
{
+#else
+static void
+register_persistent (const char *host, unsigned short port, int fd)
+{
+#endif
if (pc_active_p)
{
if (pc_last_fd == fd)
/* Return non-zero if a persistent connection is available for
connecting to HOST:PORT. */
-static int
-persistent_available_p (const char *host, unsigned short port
#ifdef HAVE_SSL
- , int ssl
-#endif
- )
+static int
+persistent_available_p (const char *host, unsigned short port, int ssl)
+{
+#else
+static int
+persistent_available_p (const char *host, unsigned short port)
{
+#endif
int success;
struct address_list *this_host_ip;
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
- long dltime; /* time of the download */
+ double dltime; /* time of the download in msecs */
int no_truncate; /* whether truncating the file is
forbidden. */
const char *referer; /* value of the referer header. */
const char *));
static int known_authentication_scheme_p PARAMS ((const char *));
-time_t http_atotm PARAMS ((char *));
+time_t http_atotm PARAMS ((const char *));
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
char *all_headers;
char *port_maybe;
char *request_keep_alive;
- int sock, hcount, num_written, all_length, statcode;
+ int sock, hcount, all_length, statcode;
+ int write_error;
long contlen, contrange;
struct url *conn;
FILE *fp;
#ifdef HAVE_SSL
static SSL_CTX *ssl_ctx = NULL;
SSL *ssl = NULL;
-#endif /* HAVE_SSL */
+#endif
char *cookies = NULL;
/* Whether this connection will be kept alive after the HTTP request
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive;
+ /* Whether we need to print the host header with braces around host,
+ e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual
+ "Host: symbolic-name:1234". */
+ int squares_around_host = 0;
+
+ /* Headers sent when using POST. */
+ char *post_content_type, *post_content_length;
+ long post_data_size = 0;
+
#ifdef HAVE_SSL
/* initialize ssl_ctx on first run */
if (!ssl_ctx)
keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0;
+ post_content_type = NULL;
+ post_content_length = NULL;
+
/* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
address_list_release (al);
if (sock < 0)
- return errno == ECONNREFUSED ? CONREFUSED : CONERROR;
+ return CONNECT_ERROR (errno);
#ifdef HAVE_SSL
if (conn->scheme == SCHEME_HTTPS)
DEBUGP (("Reusing fd %d.\n", sock));
}
- command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
+ if (*dt & HEAD_ONLY)
+ command = "HEAD";
+ else if (opt.post_file_name || opt.post_data)
+ command = "POST";
+ else
+ command = "GET";
referer = NULL;
if (hs->referer)
}
else
{
+ /* Use the full path, i.e. one that includes the leading
+ slash and the query string, but is independent of proxy
+ setting. */
+ char *pth = url_full_path (u);
wwwauth = create_authorization_line (authenticate_h, user, passwd,
- command, u->path);
+ command, pth);
+ xfree (pth);
}
}
request_keep_alive = NULL;
if (opt.cookies)
- cookies = build_cookies_request (u->host, u->port, u->path,
+ cookies = cookie_jar_generate_cookie_header (wget_cookie_jar, u->host,
+ u->port, u->path,
#ifdef HAVE_SSL
- u->scheme == SCHEME_HTTPS
+ u->scheme == SCHEME_HTTPS
#else
- 0
+ 0
#endif
- );
+ );
+
+ if (opt.post_data || opt.post_file_name)
+ {
+ post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n";
+ if (opt.post_data)
+ post_data_size = strlen (opt.post_data);
+ else
+ {
+ post_data_size = file_size (opt.post_file_name);
+ if (post_data_size == -1)
+ {
+ logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
+ opt.post_file_name);
+ post_data_size = 0;
+ }
+ }
+ post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1);
+ sprintf (post_content_length,
+ "Content-Length: %ld\r\n", post_data_size);
+ }
if (proxy)
full_path = xstrdup (u->url);
else
+ /* Use the full path, i.e. one that includes the leading slash and
+ the query string. E.g. if u->path is "foo/bar" and u->query is
+ "param=value", full_path will be "/foo/bar?param=value". */
full_path = url_full_path (u);
+ if (strchr (u->host, ':'))
+ squares_around_host = 1;
+
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command)
+ strlen (full_path)
+ (proxyauth ? strlen (proxyauth) : 0)
+ (range ? strlen (range) : 0)
+ strlen (pragma_h)
+ + (post_content_type
+ ? strlen (post_content_type) : 0)
+ + (post_content_length
+ ? strlen (post_content_length) : 0)
+ (opt.user_header ? strlen (opt.user_header) : 0)
+ 64);
/* Construct the request. */
sprintf (request, "\
%s %s HTTP/1.0\r\n\
User-Agent: %s\r\n\
-Host: %s%s\r\n\
+Host: %s%s%s%s\r\n\
Accept: %s\r\n\
-%s%s%s%s%s%s%s%s\r\n",
+%s%s%s%s%s%s%s%s%s%s\r\n",
command, full_path,
- useragent, u->host,
+ useragent,
+ squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "",
port_maybe ? port_maybe : "",
HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "",
range ? range : "",
- pragma_h,
+ pragma_h,
+ post_content_type ? post_content_type : "",
+ post_content_length ? post_content_length : "",
opt.user_header ? opt.user_header : "");
- DEBUGP (("---request begin---\n%s---request end---\n", request));
+ DEBUGP (("---request begin---\n%s", request));
/* Free the temporary memory. */
FREE_MAYBE (wwwauth);
/* Send the request to server. */
#ifdef HAVE_SSL
- if (u->scheme == SCHEME_HTTPS)
- num_written = ssl_iwrite (ssl, request, strlen (request));
+ if (conn->scheme == SCHEME_HTTPS)
+ write_error = ssl_iwrite (ssl, request, strlen (request));
else
-#endif /* HAVE_SSL */
- num_written = iwrite (sock, request, strlen (request));
+#endif
+ write_error = iwrite (sock, request, strlen (request));
- if (num_written < 0)
+ if (write_error >= 0)
+ {
+ if (opt.post_data)
+ {
+ DEBUGP (("[POST data: %s]\n", opt.post_data));
+#ifdef HAVE_SSL
+ if (conn->scheme == SCHEME_HTTPS)
+ write_error = ssl_iwrite (ssl, opt.post_data, post_data_size);
+ else
+#endif
+ write_error = iwrite (sock, opt.post_data, post_data_size);
+ }
+ else if (opt.post_file_name && post_data_size != 0)
+ {
+#ifdef HAVE_SSL
+ if (conn->scheme == SCHEME_HTTPS)
+ write_error = post_file (-1, ssl, opt.post_file_name,
+ post_data_size);
+ else
+#endif
+ write_error = post_file (sock, NULL, opt.post_file_name,
+ post_data_size);
+ }
+ }
+ DEBUGP (("---request end---\n"));
+
+ if (write_error < 0)
{
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
strerror (errno));
/* Before reading anything, initialize the rbuf. */
rbuf_initialize (&rbuf, sock);
#ifdef HAVE_SSL
- if (u->scheme == SCHEME_HTTPS)
+ if (conn->scheme == SCHEME_HTTPS)
rbuf.ssl = ssl;
else
rbuf.ssl = NULL;
goto done_header;
/* Try getting cookies. */
if (opt.cookies)
- if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u))
+ if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u))
goto done_header;
/* Try getting www-authentication. */
if (!authenticate_h)
}
}
- if (type && !strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)))
+ /* If content-type is not given, assume text/html. This is because
+ of the multitude of broken CGI's that "forget" to generate the
+ content-type. */
+ if (!type ||
+ 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
- /* We don't assume text/html by default. */
*dt &= ~TEXTHTML;
if (opt.html_extension && (*dt & TEXTHTML))
/* In case the caller inspects. */
hs->len = contlen;
hs->res = 0;
+ /* Mark as successfully retrieved. */
+ *dt |= RETROKF;
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
#### A possible solution to this would be to remember the
file position in the output document and to seek to that
- position, instead of rewinding. */
- if (!hs->restval && global_download_count == 0)
+ position, instead of rewinding.
+
+ We don't truncate stdout, since that breaks
+ "wget -O - [...] >> foo".
+ */
+ if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
(contlen != -1 ? contlen : 0),
&rbuf, keep_alive, &hs->dltime);
+ if (hs->res >= 0)
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+
{
/* Close or flush the file. We have to be careful to check for
error here. Checking the result of fwrite() is not enough --
hs->res = -2;
}
FREE_MAYBE (all_headers);
- CLOSE_FINISH (sock);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
int use_ts, got_head = 0; /* time-stamping info */
char *filename_plus_orig_suffix;
char *local_filename = NULL;
- char *tms, *suf, *locf, *tmrate;
+ char *tms, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
/* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using
FTP or whatever. */
- if (opt.cookies && opt.cookies_input && !cookies_loaded_p)
+ if (opt.cookies)
{
- load_cookies (opt.cookies_input);
- cookies_loaded_p = 1;
+ if (!wget_cookie_jar)
+ wget_cookie_jar = cookie_jar_new ();
+ if (opt.cookies_input && !cookies_loaded_p)
+ {
+ cookie_jar_load (wget_cookie_jar, opt.cookies_input);
+ cookies_loaded_p = 1;
+ }
}
*newloc = NULL;
hstat.local_file = local_file;
else if (local_file)
{
- *local_file = url_filename (u);
+ *local_file = url_file_name (u);
hstat.local_file = local_file;
}
else
{
- dummy = url_filename (u);
+ dummy = url_file_name (u);
hstat.local_file = &dummy;
}
*dt |= RETROKF;
/* #### Bogusness alert. */
- /* If its suffix is "html" or "htm", assume text/html. */
- if (((suf = suffix (*hstat.local_file)) != NULL)
- && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (*hstat.local_file))
*dt |= TEXTHTML;
FREE_MAYBE (dummy);
{
use_ts = 1;
tml = st.st_mtime;
+#ifdef WINDOWS
+ /* Modification time granularity is 2 seconds for Windows, so
+ increase local time by 1 second for later comparison. */
+ tml++;
+#endif
local_size = st.st_size;
got_head = 0;
}
it is not assigned to the FSF. So I stuck it with strptime. */
time_t
-http_atotm (char *time_string)
+http_atotm (const char *time_string)
{
/* NOTE: Solaris strptime man page claims that %n and %t match white
space, but that's not universally available. Instead, we simply
GNU strptime does not have this problem because it recognizes
both international and local dates. */
- for (i = 0; i < ARRAY_SIZE (time_formats); i++)
+ for (i = 0; i < countof (time_formats); i++)
if (check_end (strptime (time_string, time_formats[i], &t)))
return mktime_from_utc (&t);
for (i = 0; i < MD5_HASHLEN; i++, hash++)
{
- *buf++ = XDIGIT_TO_xchar (*hash >> 4);
- *buf++ = XDIGIT_TO_xchar (*hash & 0xf);
+ *buf++ = XNUM_TO_digit (*hash >> 4);
+ *buf++ = XNUM_TO_digit (*hash & 0xf);
}
*buf = '\0';
}
/* Take the line apart to find the challenge, and compose a digest
authorization header. See RFC2069 section 2.1.2. */
-char *
+static char *
digest_authentication_encode (const char *au, const char *user,
const char *passwd, const char *method,
const char *path)
int i;
au += skip_lws (au);
- for (i = 0; i < ARRAY_SIZE (options); i++)
+ for (i = 0; i < countof (options); i++)
{
int skip = extract_header_attr (au, options[i].name,
options[i].variable);
break;
}
}
- if (i == ARRAY_SIZE (options))
+ if (i == countof (options))
{
while (*au && *au != '=')
au++;