/* HTTP support.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ (at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
# include <time.h>
# endif
#endif
+#ifndef errno
+extern int errno;
+#endif
#include "wget.h"
#include "utils.h"
#include "url.h"
#include "host.h"
-#include "rbuf.h"
#include "retr.h"
-#include "headers.h"
#include "connect.h"
-#include "fnmatch.h"
#include "netrc.h"
#ifdef HAVE_SSL
# include "gen_sslfunc.h"
#ifdef USE_DIGEST
# include "gen-md5.h"
#endif
+#include "convert.h"
extern char *version_string;
+extern LARGE_INT total_downloaded_bytes;
-#ifndef errno
-extern int errno;
+#ifndef MIN
+# define MIN(x, y) ((x) > (y) ? (y) : (x))
#endif
+
\f
static int cookies_loaded_p;
+struct cookie_jar *wget_cookie_jar;
#define TEXTHTML_S "text/html"
+#define TEXTXHTML_S "application/xhtml+xml"
#define HTTP_ACCEPT "*/*"
/* Some status code validation macros: */
#define H_20X(x) (((x) >= 200) && ((x) < 300))
#define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
-#define H_REDIRECTED(x) (((x) == HTTP_STATUS_MOVED_PERMANENTLY) \
- || ((x) == HTTP_STATUS_MOVED_TEMPORARILY))
+#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
+ || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
+ || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
/* HTTP/1.0 status codes from RFC1945, provided for reference. */
/* Successful 2xx. */
#define HTTP_STATUS_MOVED_PERMANENTLY 301
#define HTTP_STATUS_MOVED_TEMPORARILY 302
#define HTTP_STATUS_NOT_MODIFIED 304
+#define HTTP_STATUS_TEMPORARY_REDIRECT 307
/* Client error 4xx. */
#define HTTP_STATUS_BAD_REQUEST 400
#define HTTP_STATUS_BAD_GATEWAY 502
#define HTTP_STATUS_UNAVAILABLE 503
-\f
+static const char *
+head_terminator (const char *hunk, int oldlen, int peeklen)
+{
+ const char *start, *end;
+
+ /* If at first peek, verify whether HUNK starts with "HTTP". If
+ not, this is a HTTP/0.9 request and we must bail out without
+ reading anything. */
+ if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
+ return hunk;
+
+ if (oldlen < 4)
+ start = hunk;
+ else
+ start = hunk + oldlen - 4;
+ end = hunk + oldlen + peeklen;
+
+ for (; start < end - 1; start++)
+ if (*start == '\n')
+ {
+ if (start < end - 2
+ && start[1] == '\r'
+ && start[2] == '\n')
+ return start + 3;
+ if (start[1] == '\n')
+ return start + 2;
+ }
+ return NULL;
+}
+
+/* Read the HTTP request head from FD and return it. The error
+ conditions are the same as with fd_read_hunk.
+
+ To support HTTP/0.9 responses, this function tries to make sure
+ that the data begins with "HTTP". If this is not the case, no data
+ is read and an empty request is returned, so that the remaining
+ data can be treated as body. */
+
+static char *
+fd_read_http_head (int fd)
+{
+ return fd_read_hunk (fd, head_terminator, 512);
+}
+
+struct response {
+ /* The response data. */
+ const char *data;
+
+ /* The array of pointers that indicate where each header starts.
+ For example, given three headers "foo", "bar", and "baz":
+ foo: value\r\nbar: value\r\nbaz: value\r\n\r\n
+ 0 1 2 3
+ I.e. headers[0] points to the beginning of foo, headers[1] points
+ to the end of foo and the beginning of bar, etc. */
+ const char **headers;
+};
+
+static struct response *
+response_new (const char *head)
+{
+ const char *hdr;
+ int count, size;
+
+ struct response *resp = xnew0 (struct response);
+ resp->data = head;
+
+ if (*head == '\0')
+ {
+ /* Empty head means that we're dealing with a headerless
+ (HTTP/0.9) response. In that case, don't set HEADERS at
+ all. */
+ return resp;
+ }
+
+ /* Split HEAD into header lines, so that response_header_* functions
+ don't need to do this over and over again. */
+
+ size = count = 0;
+ hdr = head;
+ while (1)
+ {
+ DO_REALLOC (resp->headers, size, count + 1, const char *);
+ resp->headers[count++] = hdr;
+
+ /* Break upon encountering an empty line. */
+ if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
+ break;
+
+ /* Find the end of HDR, including continuations. */
+ do
+ {
+ const char *end = strchr (hdr, '\n');
+ if (end)
+ hdr = end + 1;
+ else
+ hdr += strlen (hdr);
+ }
+ while (*hdr == ' ' || *hdr == '\t');
+ }
+ DO_REALLOC (resp->headers, size, count + 1, const char *);
+ resp->headers[count++] = NULL;
+
+ return resp;
+}
+
+static int
+response_header_bounds (const struct response *resp, const char *name,
+ const char **begptr, const char **endptr)
+{
+ int i;
+ const char **headers = resp->headers;
+ int name_len;
+
+ if (!headers || !headers[1])
+ return 0;
+
+ name_len = strlen (name);
+
+ for (i = 1; headers[i + 1]; i++)
+ {
+ const char *b = headers[i];
+ const char *e = headers[i + 1];
+ if (e - b > name_len
+ && b[name_len] == ':'
+ && 0 == strncasecmp (b, name, name_len))
+ {
+ b += name_len + 1;
+ while (b < e && ISSPACE (*b))
+ ++b;
+ while (b < e && ISSPACE (e[-1]))
+ --e;
+ *begptr = b;
+ *endptr = e;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+response_header_copy (const struct response *resp, const char *name,
+ char *buf, int bufsize)
+{
+ const char *b, *e;
+ if (!response_header_bounds (resp, name, &b, &e))
+ return 0;
+ if (bufsize)
+ {
+ int len = MIN (e - b, bufsize);
+ strncpy (buf, b, len);
+ buf[len] = '\0';
+ }
+ return 1;
+}
+
+static char *
+response_header_strdup (const struct response *resp, const char *name)
+{
+ const char *b, *e;
+ if (!response_header_bounds (resp, name, &b, &e))
+ return NULL;
+ return strdupdelim (b, e);
+}
+
/* Parse the HTTP status line, which is of format:
HTTP-Version SP Status-Code SP Reason-Phrase
- The function returns the status-code, or -1 if the status line is
- malformed. The pointer to reason-phrase is returned in RP. */
+ The function returns the status-code, or -1 if the status line
+ appears malformed. The pointer to "reason-phrase" message is
+ returned in *MESSAGE. */
+
static int
-parse_http_status_line (const char *line, const char **reason_phrase_ptr)
+response_status (const struct response *resp, char **message)
{
- /* (the variables must not be named `major' and `minor', because
- that breaks compilation with SunOS4 cc.) */
- int mjr, mnr, statcode;
- const char *p;
+ int status;
+ const char *p, *end;
- *reason_phrase_ptr = NULL;
+ if (!resp->headers)
+ {
+ /* For a HTTP/0.9 response, always assume 200 response. */
+ if (message)
+ *message = xstrdup ("OK");
+ return 200;
+ }
- /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is
- major version, and Y is minor version. */
- if (strncmp (line, "HTTP/", 5) != 0)
- return -1;
- line += 5;
+ p = resp->headers[0];
+ end = resp->headers[1];
- /* Calculate major HTTP version. */
- p = line;
- for (mjr = 0; ISDIGIT (*line); line++)
- mjr = 10 * mjr + (*line - '0');
- if (*line != '.' || p == line)
+ if (!end)
return -1;
- ++line;
- /* Calculate minor HTTP version. */
- p = line;
- for (mnr = 0; ISDIGIT (*line); line++)
- mnr = 10 * mnr + (*line - '0');
- if (*line != ' ' || p == line)
- return -1;
- /* Wget will accept only 1.0 and higher HTTP-versions. The value of
- minor version can be safely ignored. */
- if (mjr < 1)
+ /* "HTTP" */
+ if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
return -1;
- ++line;
+ p += 4;
+
+ /* "/x.x" (optional because some Gnutella servers have been reported
+ as not sending the "/x.x" part. */
+ if (p < end && *p == '/')
+ {
+ ++p;
+ while (p < end && ISDIGIT (*p))
+ ++p;
+ if (p < end && *p == '.')
+ ++p;
+ while (p < end && ISDIGIT (*p))
+ ++p;
+ }
- /* Calculate status code. */
- if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2])))
+ while (p < end && ISSPACE (*p))
+ ++p;
+ if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
return -1;
- statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0');
- /* Set up the reason phrase pointer. */
- line += 3;
- /* RFC2068 requires SPC here, but we allow the string to finish
- here, in case no reason-phrase is present. */
- if (*line != ' ')
+ status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
+ p += 3;
+
+ if (message)
{
- if (!*line)
- *reason_phrase_ptr = line;
- else
- return -1;
+ while (p < end && ISSPACE (*p))
+ ++p;
+ while (p < end && ISSPACE (end[-1]))
+ --end;
+ *message = strdupdelim (p, end);
}
- else
- *reason_phrase_ptr = line + 1;
- return statcode;
+ return status;
}
-\f
-/* Functions to be used as arguments to header_process(): */
-struct http_process_range_closure {
- long first_byte_pos;
- long last_byte_pos;
- long entity_length;
-};
+static void
+response_free (struct response *resp)
+{
+ xfree_null (resp->headers);
+ xfree (resp);
+}
+
+static void
+print_server_response_1 (const char *b, const char *e)
+{
+ char *ln;
+ if (b < e && e[-1] == '\n')
+ --e;
+ if (b < e && e[-1] == '\r')
+ --e;
+ BOUNDED_TO_ALLOCA (b, e, ln);
+ logprintf (LOG_VERBOSE, " %s\n", ln);
+}
+
+static void
+print_server_response (const struct response *resp)
+{
+ int i;
+ if (!resp->headers)
+ return;
+ for (i = 0; resp->headers[i + 1]; i++)
+ print_server_response_1 (resp->headers[i], resp->headers[i + 1]);
+}
/* Parse the `Content-Range' header and extract the information it
contains. Returns 1 if successful, -1 otherwise. */
static int
-http_process_range (const char *hdr, void *arg)
+parse_content_range (const char *hdr, long *first_byte_ptr,
+ long *last_byte_ptr, long *entity_length_ptr)
{
- struct http_process_range_closure *closure
- = (struct http_process_range_closure *)arg;
long num;
- /* Certain versions of Nutscape proxy server send out
- `Content-Length' without "bytes" specifier, which is a breach of
- RFC2068 (as well as the HTTP/1.1 draft which was current at the
- time). But hell, I must support it... */
+ /* Ancient versions of Netscape proxy server, presumably predating
+ rfc2068, sent out `Content-Range' without the "bytes"
+ specifier. */
if (!strncasecmp (hdr, "bytes", 5))
{
hdr += 5;
HTTP spec. */
if (*hdr == ':')
++hdr;
- hdr += skip_lws (hdr);
+ while (ISSPACE (*hdr))
+ ++hdr;
if (!*hdr)
return 0;
}
num = 10 * num + (*hdr - '0');
if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
return 0;
- closure->first_byte_pos = num;
+ *first_byte_ptr = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
return 0;
- closure->last_byte_pos = num;
+ *last_byte_ptr = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- closure->entity_length = num;
+ *entity_length_ptr = num;
return 1;
}
+\f
+/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
+ PROMISED_SIZE bytes are sent over the wire -- if the file is
+ longer, read only that much; if the file is shorter, report an error. */
-/* Place 1 to ARG if the HDR contains the word "none", 0 otherwise.
- Used for `Accept-Ranges'. */
static int
-http_process_none (const char *hdr, void *arg)
+post_file (int sock, const char *file_name, long promised_size)
{
- int *where = (int *)arg;
+ static char chunk[8192];
+ long written = 0;
+ int write_error;
+ FILE *fp;
- if (strstr (hdr, "none"))
- *where = 1;
- else
- *where = 0;
- return 1;
-}
+ DEBUGP (("[writing POST file %s ... ", file_name));
-/* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */
-static int
-http_process_type (const char *hdr, void *arg)
-{
- char **result = (char **)arg;
- /* Locate P on `;' or the terminating zero, whichever comes first. */
- const char *p = strchr (hdr, ';');
- if (!p)
- p = hdr + strlen (hdr);
- while (p > hdr && ISSPACE (*(p - 1)))
- --p;
- *result = strdupdelim (hdr, p);
- return 1;
-}
+ fp = fopen (file_name, "rb");
+ if (!fp)
+ return -1;
+ while (!feof (fp) && written < promised_size)
+ {
+ int towrite;
+ int length = fread (chunk, 1, sizeof (chunk), fp);
+ if (length == 0)
+ break;
+ towrite = MIN (promised_size - written, length);
+ write_error = fd_write (sock, chunk, towrite, -1);
+ if (write_error < 0)
+ {
+ fclose (fp);
+ return -1;
+ }
+ written += towrite;
+ }
+ fclose (fp);
-/* Check whether the `Connection' header is set to "keep-alive". */
-static int
-http_process_connection (const char *hdr, void *arg)
-{
- int *flag = (int *)arg;
- if (!strcasecmp (hdr, "Keep-Alive"))
- *flag = 1;
- return 1;
+ /* If we've written less than was promised, report a (probably
+ nonsensical) error rather than break the promise. */
+ if (written < promised_size)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ assert (written == promised_size);
+ DEBUGP (("done]\n"));
+ return 0;
}
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
make it such. The persistence data is stored in the variables
- below. Ideally, it would be in a structure, and it should be
- possible to cache an arbitrary fixed number of these connections.
-
- I think the code is quite easy to extend in that direction. */
+ below. Ideally, it should be possible to cache an arbitrary fixed
+ number of these connections. */
/* Whether a persistent connection is active. */
-static int pc_active_p;
-/* Host and port of currently active persistent connection. */
-static struct address_list *pc_last_host_ip;
-static unsigned short pc_last_port;
+static int pconn_active;
-/* File descriptor of the currently active persistent connection. */
-static int pc_last_fd;
+static struct {
+ /* The socket of the connection. */
+ int socket;
-#ifdef HAVE_SSL
-/* Whether a ssl handshake has occoured on this connection */
-static int pc_active_ssl;
-/* SSL connection of the currently active persistent connection. */
-static SSL *pc_last_ssl;
-#endif /* HAVE_SSL */
+ /* Host and port of the currently active persistent connection. */
+ char *host;
+ int port;
+
+ /* Whether a ssl handshake has occoured on this connection. */
+ int ssl;
+} pconn;
-/* Mark the persistent connection as invalid. This is used by the
- CLOSE_* macros after they forcefully close a registered persistent
- connection. This does not close the file descriptor -- it is left
- to the caller to do that. (Maybe it should, though.) */
+/* Mark the persistent connection as invalid and free the resources it
+ uses. This is used by the CLOSE_* macros after they forcefully
+ close a registered persistent connection. */
static void
invalidate_persistent (void)
{
- pc_active_p = 0;
-#ifdef HAVE_SSL
- pc_active_ssl = 0;
-#endif /* HAVE_SSL */
- if (pc_last_host_ip != NULL)
- {
- address_list_release (pc_last_host_ip);
- pc_last_host_ip = NULL;
- }
- DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
+ DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
+ pconn_active = 0;
+ fd_close (pconn.socket);
+ xfree (pconn.host);
+ xzero (pconn);
}
/* Register FD, which should be a TCP/IP connection to HOST:PORT, as
If a previous connection was persistent, it is closed. */
-#ifdef HAVE_SSL
static void
-register_persistent (const char *host, unsigned short port, int fd, SSL *ssl)
+register_persistent (const char *host, int port, int fd, int ssl)
{
-#else
-static void
-register_persistent (const char *host, unsigned short port, int fd)
-{
-#endif
- if (pc_active_p)
+ if (pconn_active)
{
- if (pc_last_fd == fd)
+ if (pconn.socket == fd)
{
- /* The connection FD is already registered. Nothing to
- do. */
+ /* The connection FD is already registered. */
return;
}
else
{
- /* The old persistent connection is still active; let's
- close it first. This situation arises whenever a
- persistent connection exists, but we then connect to a
- different host, and try to register a persistent
- connection to that one. */
-#ifdef HAVE_SSL
- /* The ssl disconnect has to take place before the closing
- of pc_last_fd. */
- if (pc_last_ssl)
- shutdown_ssl(pc_last_ssl);
-#endif
- CLOSE (pc_last_fd);
+ /* The old persistent connection is still active; close it
+ first. This situation arises whenever a persistent
+ connection exists, but we then connect to a different
+ host, and try to register a persistent connection to that
+ one. */
invalidate_persistent ();
}
}
- assert (pc_last_host_ip == NULL);
+ pconn_active = 1;
+ pconn.socket = fd;
+ pconn.host = xstrdup (host);
+ pconn.port = port;
+ pconn.ssl = ssl;
- /* This lookup_host cannot fail, because it has the results in the
- cache. */
- pc_last_host_ip = lookup_host (host, 1);
- assert (pc_last_host_ip != NULL);
-
- pc_last_port = port;
- pc_last_fd = fd;
- pc_active_p = 1;
-#ifdef HAVE_SSL
- pc_last_ssl = ssl;
- pc_active_ssl = ssl ? 1 : 0;
-#endif
- DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
+ DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
}
-#ifdef HAVE_SSL
-# define SHUTDOWN_SSL(ssl) do { \
- if (ssl) \
- shutdown_ssl (ssl); \
-} while (0)
-#else
-# define SHUTDOWN_SSL(ssl)
-#endif
-
/* Return non-zero if a persistent connection is available for
connecting to HOST:PORT. */
-#ifdef HAVE_SSL
static int
-persistent_available_p (const char *host, unsigned short port, int ssl)
+persistent_available_p (const char *host, int port, int ssl,
+ int *host_lookup_failed)
{
-#else
-static int
-persistent_available_p (const char *host, unsigned short port)
-{
-#endif
- int success;
- struct address_list *this_host_ip;
-
/* First, check whether a persistent connection is active at all. */
- if (!pc_active_p)
- return 0;
- /* Second, check if the active connection pertains to the correct
- (HOST, PORT) ordered pair. */
- if (port != pc_last_port)
+ if (!pconn_active)
return 0;
-#ifdef HAVE_SSL
- /* Second, a): check if current connection is (not) ssl, too. This
- test is unlikely to fail because HTTP and HTTPS typicaly use
- different ports. Yet it is possible, or so I [Christian
- Fraenkel] have been told, to run HTTPS and HTTP simultaneus on
- the same port. */
- if (ssl != pc_active_ssl)
+ /* If we want SSL and the last connection wasn't or vice versa,
+ don't use it. Checking for host and port is not enough because
+ HTTP and HTTPS can apparently coexist on the same port. */
+ if (ssl != pconn.ssl)
return 0;
-#endif /* HAVE_SSL */
- this_host_ip = lookup_host (host, 1);
- if (!this_host_ip)
+ /* If we're not connecting to the same port, we're not interested. */
+ if (port != pconn.port)
return 0;
- /* To equate the two host names for the purposes of persistent
- connections, they need to share all the IP addresses in the
- list. */
- success = address_list_match_all (pc_last_host_ip, this_host_ip);
- address_list_release (this_host_ip);
- if (!success)
- return 0;
+ /* If the host is the same, we're in business. If not, there is
+ still hope -- read below. */
+ if (0 != strcasecmp (host, pconn.host))
+ {
+ /* If pconn.socket is already talking to HOST, we needn't
+ reconnect. This happens often when both sites are virtual
+ hosts distinguished only by name and served by the same
+ network interface, and hence the same web server (possibly
+ set up by the ISP and serving many different web sites).
+ This admittedly non-standard optimization does not contradict
+ HTTP and works well with popular server software. */
+
+ int found;
+ ip_address ip;
+ struct address_list *al;
+
+ if (ssl)
+ /* Don't try to talk to two different SSL sites over the same
+ secure connection! (Besides, it's not clear if name-based
+ virtual hosting is even possible with SSL.) */
+ return 0;
- /* Third: check whether the connection is still open. This is
+ /* If pconn.socket's peer is one of the IP addresses HOST
+ resolves to, pconn.socket is for all intents and purposes
+ already talking to HOST. */
+
+ if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
+ {
+ /* Can't get the peer's address -- something must be very
+ wrong with the connection. */
+ invalidate_persistent ();
+ return 0;
+ }
+ al = lookup_host (host, 0);
+ if (!al)
+ {
+ *host_lookup_failed = 1;
+ return 0;
+ }
+
+ found = address_list_contains (al, &ip);
+ address_list_release (al);
+
+ if (!found)
+ return 0;
+
+ /* The persistent connection's peer address was found among the
+ addresses HOST resolved to; therefore, pconn.sock is in fact
+ already talking to HOST -- no need to reconnect. */
+ }
+
+ /* Finally, check whether the connection is still open. This is
important because most server implement a liberal (short) timeout
on persistent connections. Wget can of course always reconnect
if the connection doesn't work out, but it's nicer to know in
advance. This test is a logical followup of the first test, but
is "expensive" and therefore placed at the end of the list. */
- if (!test_socket_open (pc_last_fd))
+
+ if (!test_socket_open (pconn.socket))
{
/* Oops, the socket is no longer open. Now that we know that,
let's invalidate the persistent connection before returning
0. */
- CLOSE (pc_last_fd);
-#ifdef HAVE_SSL
- SHUTDOWN_SSL (pc_last_ssl);
- pc_last_ssl = NULL;
-#endif
invalidate_persistent ();
return 0;
}
+
return 1;
}
#define CLOSE_FINISH(fd) do { \
if (!keep_alive) \
{ \
- SHUTDOWN_SSL (ssl); \
- CLOSE (fd); \
- if (pc_active_p && (fd) == pc_last_fd) \
+ if (pconn_active && (fd) == pconn.socket) \
invalidate_persistent (); \
+ else \
+ fd_close (fd); \
} \
} while (0)
#define CLOSE_INVALIDATE(fd) do { \
- SHUTDOWN_SSL (ssl); \
- CLOSE (fd); \
- if (pc_active_p && (fd) == pc_last_fd) \
+ if (pconn_active && (fd) == pconn.socket) \
invalidate_persistent (); \
+ else \
+ fd_close (fd); \
} while (0)
\f
struct http_stat
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
- long dltime; /* time of the download */
+ double dltime; /* time of the download in msecs */
int no_truncate; /* whether truncating the file is
forbidden. */
const char *referer; /* value of the referer header. */
static void
free_hstat (struct http_stat *hs)
{
- FREE_MAYBE (hs->newloc);
- FREE_MAYBE (hs->remote_time);
- FREE_MAYBE (hs->error);
+ xfree_null (hs->newloc);
+ xfree_null (hs->remote_time);
+ xfree_null (hs->error);
/* Guard against being called twice. */
hs->newloc = NULL;
const char *));
static int known_authentication_scheme_p PARAMS ((const char *));
-time_t http_atotm PARAMS ((char *));
+time_t http_atotm PARAMS ((const char *));
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
will print it if there is enough information to do so (almost
always), returning the error to the caller (i.e. http_loop).
- Various HTTP parameters are stored to hs. Although it parses the
- response code correctly, it is not used in a sane way. The caller
- can do that, though.
+ Various HTTP parameters are stored to hs.
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
char *pragma_h, *referer, *useragent, *range, *wwwauth;
char *authenticate_h;
char *proxyauth;
- char *all_headers;
char *port_maybe;
char *request_keep_alive;
- int sock, hcount, num_written, all_length, statcode;
+ int sock, statcode;
+ int write_error;
long contlen, contrange;
struct url *conn;
FILE *fp;
int auth_tried_already;
- struct rbuf rbuf;
-#ifdef HAVE_SSL
- static SSL_CTX *ssl_ctx = NULL;
- SSL *ssl = NULL;
-#endif /* HAVE_SSL */
+ int using_ssl = 0;
char *cookies = NULL;
+ char *head;
+ struct response *resp;
+ char hdrval[256];
+ char *message;
+ char *set_cookie;
+
/* Whether this connection will be kept alive after the HTTP request
is done. */
int keep_alive;
- /* Flags that detect the two ways of specifying HTTP keep-alive
- response. */
- int http_keep_alive_1, http_keep_alive_2;
+ /* Flag that detects having received a keep-alive response. */
+ int keep_alive_confirmed;
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive;
"Host: symbolic-name:1234". */
int squares_around_host = 0;
+ /* Headers sent when using POST. */
+ char *post_content_type, *post_content_length;
+ long post_data_size = 0;
+
+ int host_lookup_failed;
+
#ifdef HAVE_SSL
- /* initialize ssl_ctx on first run */
- if (!ssl_ctx)
+ /* Initialize the SSL context. After the first run, this is a
+ no-op. */
+ switch (ssl_init ())
{
- uerr_t err = init_ssl (&ssl_ctx);
- if (err != 0)
- {
- switch (err)
- {
- case SSLERRCTXCREATE:
- /* this is fatal */
- logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
- ssl_printerrors ();
- return err;
- case SSLERRCERTFILE:
- /* try without certfile */
- logprintf (LOG_NOTQUIET,
- _("Failed to load certificates from %s\n"),
- opt.sslcertfile);
- ssl_printerrors ();
- logprintf (LOG_NOTQUIET,
- _("Trying without the specified certificate\n"));
- break;
- case SSLERRCERTKEY:
- logprintf (LOG_NOTQUIET,
- _("Failed to get certificate key from %s\n"),
- opt.sslcertkey);
- ssl_printerrors ();
- logprintf (LOG_NOTQUIET,
- _("Trying without the specified certificate\n"));
- break;
- default:
- break;
- }
- }
+ case SSLERRCTXCREATE:
+ /* this is fatal */
+ logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
+ return SSLERRCTXCREATE;
+ case SSLERRCERTFILE:
+ /* try without certfile */
+ logprintf (LOG_NOTQUIET,
+ _("Failed to load certificates from %s\n"),
+ opt.sslcertfile);
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ case SSLERRCERTKEY:
+ logprintf (LOG_NOTQUIET,
+ _("Failed to get certificate key from %s\n"),
+ opt.sslcertkey);
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ default:
+ break;
}
#endif /* HAVE_SSL */
know the local filename so we can save to it. */
assert (*hs->local_file != NULL);
- authenticate_h = 0;
+ authenticate_h = NULL;
auth_tried_already = 0;
inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
for the Digest authorization scheme.) */
keep_alive = 0;
- http_keep_alive_1 = http_keep_alive_2 = 0;
+ keep_alive_confirmed = 0;
+
+ post_content_type = NULL;
+ post_content_length = NULL;
/* Initialize certain elements of struct http_stat. */
hs->len = 0L;
server. */
conn = proxy ? proxy : u;
+ host_lookup_failed = 0;
+
/* First: establish the connection. */
if (inhibit_keep_alive
- ||
-#ifndef HAVE_SSL
- !persistent_available_p (conn->host, conn->port)
+ || !persistent_available_p (conn->host, conn->port,
+#ifdef HAVE_SSL
+ u->scheme == SCHEME_HTTPS
#else
- !persistent_available_p (conn->host, conn->port,
- u->scheme == SCHEME_HTTPS)
-#endif /* HAVE_SSL */
- )
+ 0
+#endif
+ , &host_lookup_failed))
{
- struct address_list *al = lookup_host (conn->host, 0);
- if (!al)
+ /* In its current implementation, persistent_available_p will
+ look up conn->host in some cases. If that lookup failed, we
+ don't need to bother with connect_to_host. */
+ if (host_lookup_failed)
return HOSTERR;
- set_connection_host_name (conn->host);
- sock = connect_to_many (al, conn->port, 0);
- set_connection_host_name (NULL);
- address_list_release (al);
- if (sock < 0)
- return errno == ECONNREFUSED ? CONREFUSED : CONERROR;
+ sock = connect_to_host (conn->host, conn->port);
+ if (sock == E_HOST)
+ return HOSTERR;
+ else if (sock < 0)
+ return (retryable_socket_connect_error (errno)
+ ? CONERROR : CONIMPOSSIBLE);
#ifdef HAVE_SSL
if (conn->scheme == SCHEME_HTTPS)
- if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
- {
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
- ssl_printerrors ();
- CLOSE (sock);
- return CONSSLERR;
- }
+ {
+ if (!ssl_connect (sock))
+ {
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET,
+ _("Unable to establish SSL connection.\n"));
+ fd_close (sock);
+ return CONSSLERR;
+ }
+ using_ssl = 1;
+ }
#endif /* HAVE_SSL */
}
else
{
- logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
- conn->host, conn->port);
- /* #### pc_last_fd should be accessed through an accessor
- function. */
- sock = pc_last_fd;
-#ifdef HAVE_SSL
- ssl = pc_last_ssl;
-#endif /* HAVE_SSL */
+ logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
+ pconn.host, pconn.port);
+ sock = pconn.socket;
+ using_ssl = pconn.ssl;
DEBUGP (("Reusing fd %d.\n", sock));
}
- command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
+ if (*dt & HEAD_ONLY)
+ command = "HEAD";
+ else if (opt.post_file_name || opt.post_data)
+ command = "POST";
+ else
+ command = "GET";
referer = NULL;
if (hs->referer)
request_keep_alive = NULL;
if (opt.cookies)
- cookies = build_cookies_request (u->host, u->port, u->path,
+ cookies = cookie_header (wget_cookie_jar, u->host, u->port, u->path,
#ifdef HAVE_SSL
- u->scheme == SCHEME_HTTPS
+ u->scheme == SCHEME_HTTPS
#else
- 0
+ 0
#endif
- );
+ );
+
+ if (opt.post_data || opt.post_file_name)
+ {
+ post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n";
+ if (opt.post_data)
+ post_data_size = strlen (opt.post_data);
+ else
+ {
+ post_data_size = file_size (opt.post_file_name);
+ if (post_data_size == -1)
+ {
+ logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
+ opt.post_file_name);
+ post_data_size = 0;
+ }
+ }
+ post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1);
+ sprintf (post_content_length,
+ "Content-Length: %ld\r\n", post_data_size);
+ }
if (proxy)
full_path = xstrdup (u->url);
+ (proxyauth ? strlen (proxyauth) : 0)
+ (range ? strlen (range) : 0)
+ strlen (pragma_h)
+ + (post_content_type
+ ? strlen (post_content_type) : 0)
+ + (post_content_length
+ ? strlen (post_content_length) : 0)
+ (opt.user_header ? strlen (opt.user_header) : 0)
+ 64);
/* Construct the request. */
User-Agent: %s\r\n\
Host: %s%s%s%s\r\n\
Accept: %s\r\n\
-%s%s%s%s%s%s%s%s\r\n",
+%s%s%s%s%s%s%s%s%s%s\r\n",
command, full_path,
useragent,
squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "",
wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "",
range ? range : "",
- pragma_h,
+ pragma_h,
+ post_content_type ? post_content_type : "",
+ post_content_length ? post_content_length : "",
opt.user_header ? opt.user_header : "");
- DEBUGP (("---request begin---\n%s---request end---\n", request));
+ DEBUGP (("\n---request begin---\n%s", request));
/* Free the temporary memory. */
- FREE_MAYBE (wwwauth);
- FREE_MAYBE (proxyauth);
- FREE_MAYBE (cookies);
+ xfree_null (wwwauth);
+ xfree_null (proxyauth);
+ xfree_null (cookies);
xfree (full_path);
/* Send the request to server. */
-#ifdef HAVE_SSL
- if (conn->scheme == SCHEME_HTTPS)
- num_written = ssl_iwrite (ssl, request, strlen (request));
- else
-#endif /* HAVE_SSL */
- num_written = iwrite (sock, request, strlen (request));
+ write_error = fd_write (sock, request, strlen (request), -1);
- if (num_written < 0)
+ if (write_error >= 0)
+ {
+ if (opt.post_data)
+ {
+ DEBUGP (("[POST data: %s]\n", opt.post_data));
+ write_error = fd_write (sock, opt.post_data, post_data_size, -1);
+ }
+ else if (opt.post_file_name && post_data_size != 0)
+ write_error = post_file (sock, opt.post_file_name, post_data_size);
+ }
+ DEBUGP (("---request end---\n"));
+
+ if (write_error < 0)
{
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
strerror (errno));
statcode = -1;
*dt &= ~RETROKF;
- /* Before reading anything, initialize the rbuf. */
- rbuf_initialize (&rbuf, sock);
-#ifdef HAVE_SSL
- if (conn->scheme == SCHEME_HTTPS)
- rbuf.ssl = ssl;
- else
- rbuf.ssl = NULL;
-#endif /* HAVE_SSL */
- all_headers = NULL;
- all_length = 0;
- /* Header-fetching loop. */
- hcount = 0;
- while (1)
+ head = fd_read_http_head (sock);
+ if (!head)
{
- char *hdr;
- int status;
-
- ++hcount;
- /* Get the header. */
- status = header_get (&rbuf, &hdr,
- /* Disallow continuations for status line. */
- (hcount == 1 ? HG_NO_CONTINUATIONS : HG_NONE));
-
- /* Check for errors. */
- if (status == HG_EOF && *hdr)
+ logputs (LOG_VERBOSE, "\n");
+ if (errno == 0)
{
- /* This used to be an unconditional error, but that was
- somewhat controversial, because of a large number of
- broken CGI's that happily "forget" to send the second EOL
- before closing the connection of a HEAD request.
-
- So, the deal is to check whether the header is empty
- (*hdr is zero if it is); if yes, it means that the
- previous header was fully retrieved, and that -- most
- probably -- the request is complete. "...be liberal in
- what you accept." Oh boy. */
- logputs (LOG_VERBOSE, "\n");
- logputs (LOG_NOTQUIET, _("End of file while parsing headers.\n"));
- xfree (hdr);
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
+ logputs (LOG_NOTQUIET, _("No data received.\n"));
CLOSE_INVALIDATE (sock);
return HEOF;
}
- else if (status == HG_ERROR)
+ else
{
- logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
strerror (errno));
- xfree (hdr);
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
CLOSE_INVALIDATE (sock);
return HERR;
}
+ }
- /* If the headers are to be saved to a file later, save them to
- memory now. */
- if (opt.save_headers)
- {
- int lh = strlen (hdr);
- all_headers = (char *)xrealloc (all_headers, all_length + lh + 2);
- memcpy (all_headers + all_length, hdr, lh);
- all_length += lh;
- all_headers[all_length++] = '\n';
- all_headers[all_length] = '\0';
- }
+ DEBUGP (("\n---response begin---\n"));
+ DEBUGP (("%s", head));
+ DEBUGP (("---response end---\n"));
- /* Check for status line. */
- if (hcount == 1)
- {
- const char *error;
- /* Parse the first line of server response. */
- statcode = parse_http_status_line (hdr, &error);
- hs->statcode = statcode;
- /* Store the descriptive response. */
- if (statcode == -1) /* malformed response */
- {
- /* A common reason for "malformed response" error is the
- case when no data was actually received. Handle this
- special case. */
- if (!*hdr)
- hs->error = xstrdup (_("No data received"));
- else
- hs->error = xstrdup (_("Malformed status line"));
- xfree (hdr);
- break;
- }
- else if (!*error)
- hs->error = xstrdup (_("(no description)"));
- else
- hs->error = xstrdup (error);
+ resp = response_new (head);
- if ((statcode != -1)
-#ifdef DEBUG
- && !opt.debug
-#endif
- )
- {
- if (opt.server_response)
- logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
- else
- logprintf (LOG_VERBOSE, "%2d %s", statcode, error);
- }
+ /* Check for status line. */
+ message = NULL;
+ statcode = response_status (resp, &message);
+ if (!opt.server_response)
+ logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
+ else
+ {
+ logprintf (LOG_VERBOSE, "\n");
+ print_server_response (resp);
+ }
- goto done_header;
- }
+ hs->statcode = statcode;
+ if (statcode == -1)
+ hs->error = xstrdup (_("Malformed status line"));
+ else if (!*message)
+ hs->error = xstrdup (_("(no description)"));
+ else
+ hs->error = xstrdup (message);
- /* Exit on empty header. */
- if (!*hdr)
+ if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
+ contlen = strtol (hdrval, NULL, 10);
+ type = response_header_strdup (resp, "Content-Type");
+ if (type)
+ {
+ char *tmp = strchr (type, ';');
+ if (tmp)
{
- xfree (hdr);
- break;
+ while (tmp > type && ISSPACE (tmp[-1]))
+ --tmp;
+ *tmp = '\0';
}
+ }
+ hs->newloc = response_header_strdup (resp, "Location");
+ hs->remote_time = response_header_strdup (resp, "Last-Modified");
+ set_cookie = response_header_strdup (resp, "Set-Cookie");
+ if (set_cookie)
+ {
+ /* The jar should have been created by now. */
+ assert (wget_cookie_jar != NULL);
+ cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
+ set_cookie);
+ xfree (set_cookie);
+ }
+ authenticate_h = response_header_strdup (resp, "WWW-Authenticate");
+ if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
+ {
+ long first_byte_pos, last_byte_pos, entity_length;
+ if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
+ &entity_length))
+ contrange = first_byte_pos;
+ }
- /* Print the header if requested. */
- if (opt.server_response && hcount != 1)
- logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
-
- /* Try getting content-length. */
- if (contlen == -1 && !opt.ignore_length)
- if (header_process (hdr, "Content-Length", header_extract_number,
- &contlen))
- goto done_header;
- /* Try getting content-type. */
- if (!type)
- if (header_process (hdr, "Content-Type", http_process_type, &type))
- goto done_header;
- /* Try getting location. */
- if (!hs->newloc)
- if (header_process (hdr, "Location", header_strdup, &hs->newloc))
- goto done_header;
- /* Try getting last-modified. */
- if (!hs->remote_time)
- if (header_process (hdr, "Last-Modified", header_strdup,
- &hs->remote_time))
- goto done_header;
- /* Try getting cookies. */
- if (opt.cookies)
- if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u))
- goto done_header;
- /* Try getting www-authentication. */
- if (!authenticate_h)
- if (header_process (hdr, "WWW-Authenticate", header_strdup,
- &authenticate_h))
- goto done_header;
- /* Check for accept-ranges header. If it contains the word
- `none', disable the ranges. */
- if (*dt & ACCEPTRANGES)
- {
- int nonep;
- if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep))
- {
- if (nonep)
- *dt &= ~ACCEPTRANGES;
- goto done_header;
- }
- }
- /* Try getting content-range. */
- if (contrange == -1)
- {
- struct http_process_range_closure closure;
- if (header_process (hdr, "Content-Range", http_process_range, &closure))
- {
- contrange = closure.first_byte_pos;
- goto done_header;
- }
- }
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive)
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
+ {
+ if (response_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = 1;
+ else if (response_header_copy (resp, "Connection", hdrval,
+ sizeof (hdrval)))
{
- /* Check for the `Keep-Alive' header. */
- if (!http_keep_alive_1)
- {
- if (header_process (hdr, "Keep-Alive", header_exists,
- &http_keep_alive_1))
- goto done_header;
- }
- /* Check for `Connection: Keep-Alive'. */
- if (!http_keep_alive_2)
- {
- if (header_process (hdr, "Connection", http_process_connection,
- &http_keep_alive_2))
- goto done_header;
- }
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = 1;
}
- done_header:
- xfree (hdr);
}
+ response_free (resp);
- logputs (LOG_VERBOSE, "\n");
-
- if (contlen != -1
- && (http_keep_alive_1 || http_keep_alive_2))
- {
- assert (inhibit_keep_alive == 0);
- keep_alive = 1;
- }
if (keep_alive)
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
-#ifndef HAVE_SSL
- register_persistent (conn->host, conn->port, sock);
-#else
- register_persistent (conn->host, conn->port, sock, ssl);
-#endif /* HAVE_SSL */
+ register_persistent (conn->host, conn->port, sock, using_ssl);
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
&& authenticate_h)
{
/* Authorization is required. */
- FREE_MAYBE (type);
+ xfree_null (type);
type = NULL;
free_hstat (hs);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
hs->newloc ? _(" [following]") : "");
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
+ xfree_null (type);
return NEWLOCATION;
}
}
- if (type && !strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)))
+ /* If content-type is not given, assume text/html. This is because
+ of the multitude of broken CGI's that "forget" to generate the
+ content-type. */
+ if (!type ||
+ 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
- /* We don't assume text/html by default. */
*dt &= ~TEXTHTML;
if (opt.html_extension && (*dt & TEXTHTML))
{
char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
- if (last_period_in_local_filename == NULL ||
- !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
- strcasecmp(last_period_in_local_filename, ".html") == EQ))
+ if (last_period_in_local_filename == NULL
+ || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
+ || 0 == strcasecmp (last_period_in_local_filename, ".html")))
{
size_t local_filename_len = strlen(*hs->local_file);
hs->res = 0;
/* Mark as successfully retrieved. */
*dt |= RETROKF;
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
+ xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
return RETRUNNEEDED;
\n\
Continued download failed on this file, which conflicts with `-c'.\n\
Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
+ xfree_null (type);
CLOSE_INVALIDATE (sock);
return CONTNOTSUPPORTED;
}
{
/* This means the whole request was somehow misunderstood by the
server. Bail out. */
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
+ xfree_null (type);
CLOSE_INVALIDATE (sock);
return RANGEERR;
}
logputs (LOG_VERBOSE, "\n");
}
}
- FREE_MAYBE (type);
+ xfree_null (type);
type = NULL; /* We don't need it any more. */
/* Return if we have no intention of further downloading. */
/* In case the caller cares to look... */
hs->len = 0L;
hs->res = 0;
- FREE_MAYBE (type);
- FREE_MAYBE (all_headers);
+ xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
return RETRFINISHED;
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
- FREE_MAYBE (all_headers);
return FOPENERR;
}
}
#### A possible solution to this would be to remember the
file position in the output document and to seek to that
- position, instead of rewinding. */
- if (!hs->restval && global_download_count == 0)
+ position, instead of rewinding.
+
+ We don't truncate stdout, since that breaks
+ "wget -O - [...] >> foo".
+ */
+ if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
if (opt.save_headers)
- fwrite (all_headers, 1, all_length, fp);
+ fwrite (head, 1, strlen (head), fp);
/* Get the contents of the document. */
- hs->res = get_contents (sock, fp, &hs->len, hs->restval,
+ hs->res = fd_read_body (sock, fp, &hs->len, hs->restval,
(contlen != -1 ? contlen : 0),
- &rbuf, keep_alive, &hs->dltime);
+ keep_alive, &hs->dltime);
if (hs->res >= 0)
CLOSE_FINISH (sock);
if (flush_res == EOF)
hs->res = -2;
}
- FREE_MAYBE (all_headers);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
int use_ts, got_head = 0; /* time-stamping info */
char *filename_plus_orig_suffix;
char *local_filename = NULL;
- char *tms, *suf, *locf, *tmrate;
+ char *tms, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
/* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using
FTP or whatever. */
- if (opt.cookies && opt.cookies_input && !cookies_loaded_p)
+ if (opt.cookies)
{
- load_cookies (opt.cookies_input);
- cookies_loaded_p = 1;
+ if (!wget_cookie_jar)
+ wget_cookie_jar = cookie_jar_new ();
+ if (opt.cookies_input && !cookies_loaded_p)
+ {
+ cookie_jar_load (wget_cookie_jar, opt.cookies_input);
+ cookies_loaded_p = 1;
+ }
}
*newloc = NULL;
hstat.local_file = local_file;
else if (local_file)
{
- *local_file = url_filename (u);
+ *local_file = url_file_name (u);
hstat.local_file = local_file;
}
else
{
- dummy = url_filename (u);
+ dummy = url_file_name (u);
hstat.local_file = &dummy;
}
*dt |= RETROKF;
/* #### Bogusness alert. */
- /* If its suffix is "html" or "htm", assume text/html. */
- if (((suf = suffix (*hstat.local_file)) != NULL)
- && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (*hstat.local_file))
*dt |= TEXTHTML;
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
}
use_ts = 0;
if (opt.timestamping)
{
- boolean local_dot_orig_file_exists = FALSE;
+ int local_dot_orig_file_exists = 0;
if (opt.backup_converted)
/* If -K is specified, we'll act on the assumption that it was specified
/* Try to stat() the .orig file. */
if (stat (filename_plus_orig_suffix, &st) == 0)
{
- local_dot_orig_file_exists = TRUE;
+ local_dot_orig_file_exists = 1;
local_filename = filename_plus_orig_suffix;
}
}
{
use_ts = 1;
tml = st.st_mtime;
+#ifdef WINDOWS
+ /* Modification time granularity is 2 seconds for Windows, so
+ increase local time by 1 second for later comparison. */
+ tml++;
+#endif
local_size = st.st_size;
got_head = 0;
}
printwhat (count, opt.ntry);
continue;
break;
- case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
+ case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return err;
break;
case FWRITEERR: case FOPENERR:
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
*hstat.local_file, strerror (errno));
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return err;
break;
case CONSSLERR:
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return err;
break;
case NEWLOCATION:
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return WRONGCODE;
}
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return NEWLOCATION;
break;
case RETRUNNEEDED:
/* The file was already fully retrieved. */
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
break;
case RETRFINISHED:
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return WRONGCODE;
}
Server file no newer than local file `%s' -- not retrieving.\n\n"),
local_filename);
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
}
else if (tml >= tmr)
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
}
tms, u->url, hstat.len, hstat.contlen, locf, count);
}
++opt.numurls;
- downloaded_increase (hstat.len);
+ total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
}
else if (hstat.res == 0) /* No read error */
tms, u->url, hstat.len, locf, count);
}
++opt.numurls;
- downloaded_increase (hstat.len);
+ total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
"%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
- downloaded_increase (hstat.len);
+ total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
- FREE_MAYBE (dummy);
+ xfree_null (dummy);
return RETROK;
}
else /* the same, but not accepted */
it is not assigned to the FSF. So I stuck it with strptime. */
time_t
-http_atotm (char *time_string)
+http_atotm (const char *time_string)
{
/* NOTE: Solaris strptime man page claims that %n and %t match white
space, but that's not universally available. Instead, we simply
GNU strptime does not have this problem because it recognizes
both international and local dates. */
- for (i = 0; i < ARRAY_SIZE (time_formats); i++)
+ for (i = 0; i < countof (time_formats); i++)
if (check_end (strptime (time_string, time_formats[i], &t)))
return mktime_from_utc (&t);
return res;
}
+#define SKIP_WS(x) do { \
+ while (ISSPACE (*(x))) \
+ ++(x); \
+} while (0)
+
#ifdef USE_DIGEST
/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
of a field in such a header. If the field is the one specified by
cp += strlen (attr_name);
if (!*cp)
return -1;
- cp += skip_lws (cp);
+ SKIP_WS (cp);
if (*cp != '=')
return -1;
if (!*++cp)
return -1;
- cp += skip_lws (cp);
+ SKIP_WS (cp);
if (*cp != '\"')
return -1;
if (!*++cp)
;
if (!*ep)
return -1;
- FREE_MAYBE (*ret);
+ xfree_null (*ret);
*ret = strdupdelim (cp, ep);
return ep - au + 1;
}
for (i = 0; i < MD5_HASHLEN; i++, hash++)
{
- *buf++ = XDIGIT_TO_xchar (*hash >> 4);
- *buf++ = XDIGIT_TO_xchar (*hash & 0xf);
+ *buf++ = XNUM_TO_digit (*hash >> 4);
+ *buf++ = XNUM_TO_digit (*hash & 0xf);
}
*buf = '\0';
}
{
int i;
- au += skip_lws (au);
- for (i = 0; i < ARRAY_SIZE (options); i++)
+ SKIP_WS (au);
+ for (i = 0; i < countof (options); i++)
{
int skip = extract_header_attr (au, options[i].name,
options[i].variable);
if (skip < 0)
{
- FREE_MAYBE (realm);
- FREE_MAYBE (opaque);
- FREE_MAYBE (nonce);
+ xfree_null (realm);
+ xfree_null (opaque);
+ xfree_null (nonce);
return NULL;
}
else if (skip)
break;
}
}
- if (i == ARRAY_SIZE (options))
+ if (i == countof (options))
{
while (*au && *au != '=')
au++;
if (*au && *++au)
{
- au += skip_lws (au);
+ SKIP_WS (au);
if (*au == '\"')
{
au++;
}
if (!realm || !nonce || !user || !passwd || !path || !method)
{
- FREE_MAYBE (realm);
- FREE_MAYBE (opaque);
- FREE_MAYBE (nonce);
+ xfree_null (realm);
+ xfree_null (opaque);
+ xfree_null (nonce);
return NULL;
}
void
http_cleanup (void)
{
- if (pc_last_host_ip)
- address_list_release (pc_last_host_ip);
}