#include "utils.h"
#include "url.h"
#include "host.h"
-#include "rbuf.h"
#include "retr.h"
-#include "headers.h"
#include "connect.h"
#include "netrc.h"
#ifdef HAVE_SSL
extern char *version_string;
extern LARGE_INT total_downloaded_bytes;
+#ifndef MIN
+# define MIN(x, y) ((x) > (y) ? (y) : (x))
+#endif
+
\f
static int cookies_loaded_p;
struct cookie_jar *wget_cookie_jar;
#define HTTP_STATUS_BAD_GATEWAY 502
#define HTTP_STATUS_UNAVAILABLE 503
-\f
-/* Parse the HTTP status line, which is of format:
+static const char *
+head_terminator (const char *hunk, int oldlen, int peeklen)
+{
+ const char *start, *end;
- HTTP-Version SP Status-Code SP Reason-Phrase
+ /* If at first peek, verify whether HUNK starts with "HTTP". If
+ not, this is a HTTP/0.9 request and we must bail out without
+ reading anything. */
+ if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
+ return hunk;
- The function returns the status-code, or -1 if the status line is
- malformed. The pointer to reason-phrase is returned in RP. */
-static int
-parse_http_status_line (const char *line, const char **reason_phrase_ptr)
+ if (oldlen < 4)
+ start = hunk;
+ else
+ start = hunk + oldlen - 4;
+ end = hunk + oldlen + peeklen;
+
+ for (; start < end - 1; start++)
+ if (*start == '\n')
+ {
+ if (start < end - 2
+ && start[1] == '\r'
+ && start[2] == '\n')
+ return start + 3;
+ if (start[1] == '\n')
+ return start + 2;
+ }
+ return NULL;
+}
+
+/* Read the HTTP request head from FD and return it. The error
+ conditions are the same as with fd_read_hunk.
+
+ To support HTTP/0.9 responses, this function tries to make sure
+ that the data begins with "HTTP". If this is not the case, no data
+ is read and an empty request is returned, so that the remaining
+ data can be treated as body. */
+
+static char *
+fd_read_http_head (int fd)
{
- /* (the variables must not be named `major' and `minor', because
- that breaks compilation with SunOS4 cc.) */
- int mjr, mnr, statcode;
- const char *p;
+ return fd_read_hunk (fd, head_terminator, 512);
+}
- *reason_phrase_ptr = NULL;
+struct response {
+ /* The response data. */
+ const char *data;
+
+ /* The array of pointers that indicate where each header starts.
+ For example, given three headers "foo", "bar", and "baz":
+ foo: value\r\nbar: value\r\nbaz: value\r\n\r\n
+ 0 1 2 3
+ I.e. headers[0] points to the beginning of foo, headers[1] points
+ to the end of foo and the beginning of bar, etc. */
+ const char **headers;
+};
- /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is
- major version, and Y is minor version. */
- if (strncmp (line, "HTTP/", 5) != 0)
- return -1;
- line += 5;
+static struct response *
+response_new (const char *head)
+{
+ const char *hdr;
+ int count, size;
- /* Calculate major HTTP version. */
- p = line;
- for (mjr = 0; ISDIGIT (*line); line++)
- mjr = 10 * mjr + (*line - '0');
- if (*line != '.' || p == line)
- return -1;
- ++line;
+ struct response *resp = xnew0 (struct response);
+ resp->data = head;
- /* Calculate minor HTTP version. */
- p = line;
- for (mnr = 0; ISDIGIT (*line); line++)
- mnr = 10 * mnr + (*line - '0');
- if (*line != ' ' || p == line)
- return -1;
- /* Wget will accept only 1.0 and higher HTTP-versions. The value of
- minor version can be safely ignored. */
- if (mjr < 1)
- return -1;
- ++line;
+ if (*head == '\0')
+ {
+ /* Empty head means that we're dealing with a headerless
+ (HTTP/0.9) response. In that case, don't set HEADERS at
+ all. */
+ return resp;
+ }
- /* Calculate status code. */
- if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2])))
- return -1;
- statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0');
+ /* Split HEAD into header lines, so that response_header_* functions
+ don't need to do this over and over again. */
- /* Set up the reason phrase pointer. */
- line += 3;
- /* RFC2068 requires SPC here, but we allow the string to finish
- here, in case no reason-phrase is present. */
- if (*line != ' ')
+ size = count = 0;
+ hdr = head;
+ while (1)
{
- if (!*line)
- *reason_phrase_ptr = line;
- else
- return -1;
+ DO_REALLOC (resp->headers, size, count + 1, const char *);
+ resp->headers[count++] = hdr;
+
+ /* Break upon encountering an empty line. */
+ if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
+ break;
+
+ /* Find the end of HDR, including continuations. */
+ do
+ {
+ const char *end = strchr (hdr, '\n');
+ if (end)
+ hdr = end + 1;
+ else
+ hdr += strlen (hdr);
+ }
+ while (*hdr == ' ' || *hdr == '\t');
}
- else
- *reason_phrase_ptr = line + 1;
+ DO_REALLOC (resp->headers, size, count + 1, const char *);
+ resp->headers[count++] = NULL;
- return statcode;
+ return resp;
}
-\f
-#define WMIN(x, y) ((x) > (y) ? (y) : (x))
-
-/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
- PROMISED_SIZE bytes are sent over the wire -- if the file is
- longer, read only that much; if the file is shorter, report an error. */
static int
-post_file (int sock, const char *file_name, long promised_size)
+response_header_bounds (const struct response *resp, const char *name,
+ const char **begptr, const char **endptr)
{
- static char chunk[8192];
- long written = 0;
- int write_error;
- FILE *fp;
+ int i;
+ const char **headers = resp->headers;
+ int name_len;
- DEBUGP (("[writing POST file %s ... ", file_name));
+ if (!headers || !headers[1])
+ return 0;
- fp = fopen (file_name, "rb");
- if (!fp)
- return -1;
- while (!feof (fp) && written < promised_size)
+ name_len = strlen (name);
+
+ for (i = 1; headers[i + 1]; i++)
{
- int towrite;
- int length = fread (chunk, 1, sizeof (chunk), fp);
- if (length == 0)
- break;
- towrite = WMIN (promised_size - written, length);
- write_error = xwrite (sock, chunk, towrite, -1);
- if (write_error < 0)
+ const char *b = headers[i];
+ const char *e = headers[i + 1];
+ if (e - b > name_len
+ && b[name_len] == ':'
+ && 0 == strncasecmp (b, name, name_len))
{
- fclose (fp);
- return -1;
+ b += name_len + 1;
+ while (b < e && ISSPACE (*b))
+ ++b;
+ while (b < e && ISSPACE (e[-1]))
+ --e;
+ *begptr = b;
+ *endptr = e;
+ return 1;
}
- written += towrite;
}
- fclose (fp);
+ return 0;
+}
- /* If we've written less than was promised, report a (probably
- nonsensical) error rather than break the promise. */
- if (written < promised_size)
+static int
+response_header_copy (const struct response *resp, const char *name,
+ char *buf, int bufsize)
+{
+ const char *b, *e;
+ if (!response_header_bounds (resp, name, &b, &e))
+ return 0;
+ if (bufsize)
{
- errno = EINVAL;
- return -1;
+ int len = MIN (e - b, bufsize);
+ strncpy (buf, b, len);
+ buf[len] = '\0';
}
+ return 1;
+}
- assert (written == promised_size);
- DEBUGP (("done]\n"));
- return 0;
+static char *
+response_header_strdup (const struct response *resp, const char *name)
+{
+ const char *b, *e;
+ if (!response_header_bounds (resp, name, &b, &e))
+ return NULL;
+ return strdupdelim (b, e);
}
-\f
-/* Functions to be used as arguments to header_process(): */
-struct http_process_range_closure {
- long first_byte_pos;
- long last_byte_pos;
- long entity_length;
-};
+/* Parse the HTTP status line, which is of format:
+
+ HTTP-Version SP Status-Code SP Reason-Phrase
+
+ The function returns the status-code, or -1 if the status line
+ appears malformed. The pointer to "reason-phrase" message is
+ returned in *MESSAGE. */
+
+static int
+response_status (const struct response *resp, char **message)
+{
+ int status;
+ const char *p, *end;
+
+ if (!resp->headers)
+ {
+ /* For a HTTP/0.9 response, always assume 200 response. */
+ if (message)
+ *message = xstrdup ("OK");
+ return 200;
+ }
+
+ p = resp->headers[0];
+ end = resp->headers[1];
+
+ if (!end)
+ return -1;
+
+ /* "HTTP" */
+ if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
+ return -1;
+ p += 4;
+
+ /* "/x.x" (optional because some Gnutella servers have been reported
+ as not sending the "/x.x" part. */
+ if (p < end && *p == '/')
+ {
+ ++p;
+ while (p < end && ISDIGIT (*p))
+ ++p;
+ if (p < end && *p == '.')
+ ++p;
+ while (p < end && ISDIGIT (*p))
+ ++p;
+ }
+
+ while (p < end && ISSPACE (*p))
+ ++p;
+ if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
+ return -1;
+
+ status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
+ p += 3;
+
+ if (message)
+ {
+ while (p < end && ISSPACE (*p))
+ ++p;
+ while (p < end && ISSPACE (end[-1]))
+ --end;
+ *message = strdupdelim (p, end);
+ }
+
+ return status;
+}
+
+static void
+response_free (struct response *resp)
+{
+ xfree_null (resp->headers);
+ xfree (resp);
+}
+
+static void
+print_server_response_1 (const char *b, const char *e)
+{
+ char *ln;
+ if (b < e && e[-1] == '\n')
+ --e;
+ if (b < e && e[-1] == '\r')
+ --e;
+ BOUNDED_TO_ALLOCA (b, e, ln);
+ logprintf (LOG_VERBOSE, " %s\n", ln);
+}
+
+static void
+print_server_response (const struct response *resp)
+{
+ int i;
+ if (!resp->headers)
+ return;
+ for (i = 0; resp->headers[i + 1]; i++)
+ print_server_response_1 (resp->headers[i], resp->headers[i + 1]);
+}
/* Parse the `Content-Range' header and extract the information it
contains. Returns 1 if successful, -1 otherwise. */
static int
-http_process_range (const char *hdr, void *arg)
+parse_content_range (const char *hdr, long *first_byte_ptr,
+ long *last_byte_ptr, long *entity_length_ptr)
{
- struct http_process_range_closure *closure
- = (struct http_process_range_closure *)arg;
long num;
- /* Certain versions of Nutscape proxy server send out
- `Content-Length' without "bytes" specifier, which is a breach of
- RFC2068 (as well as the HTTP/1.1 draft which was current at the
- time). But hell, I must support it... */
+ /* Ancient versions of Netscape proxy server, presumably predating
+ rfc2068, sent out `Content-Range' without the "bytes"
+ specifier. */
if (!strncasecmp (hdr, "bytes", 5))
{
hdr += 5;
HTTP spec. */
if (*hdr == ':')
++hdr;
- hdr += skip_lws (hdr);
+ while (ISSPACE (*hdr))
+ ++hdr;
if (!*hdr)
return 0;
}
num = 10 * num + (*hdr - '0');
if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
return 0;
- closure->first_byte_pos = num;
+ *first_byte_ptr = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
return 0;
- closure->last_byte_pos = num;
+ *last_byte_ptr = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- closure->entity_length = num;
- return 1;
-}
-
-/* Place 1 to ARG if the HDR contains the word "none", 0 otherwise.
- Used for `Accept-Ranges'. */
-static int
-http_process_none (const char *hdr, void *arg)
-{
- int *where = (int *)arg;
-
- if (strstr (hdr, "none"))
- *where = 1;
- else
- *where = 0;
- return 1;
-}
-
-/* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */
-static int
-http_process_type (const char *hdr, void *arg)
-{
- char **result = (char **)arg;
- /* Locate P on `;' or the terminating zero, whichever comes first. */
- const char *p = strchr (hdr, ';');
- if (!p)
- p = hdr + strlen (hdr);
- while (p > hdr && ISSPACE (*(p - 1)))
- --p;
- *result = strdupdelim (hdr, p);
+ *entity_length_ptr = num;
return 1;
}
+\f
+/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
+ PROMISED_SIZE bytes are sent over the wire -- if the file is
+ longer, read only that much; if the file is shorter, report an error. */
-/* Check whether the `Connection' header is set to "keep-alive". */
static int
-http_process_connection (const char *hdr, void *arg)
+post_file (int sock, const char *file_name, long promised_size)
{
- int *flag = (int *)arg;
- if (!strcasecmp (hdr, "Keep-Alive"))
- *flag = 1;
- return 1;
-}
+ static char chunk[8192];
+ long written = 0;
+ int write_error;
+ FILE *fp;
-/* Commit the cookie to the cookie jar. */
+ DEBUGP (("[writing POST file %s ... ", file_name));
-int
-http_process_set_cookie (const char *hdr, void *arg)
-{
- struct url *u = (struct url *)arg;
+ fp = fopen (file_name, "rb");
+ if (!fp)
+ return -1;
+ while (!feof (fp) && written < promised_size)
+ {
+ int towrite;
+ int length = fread (chunk, 1, sizeof (chunk), fp);
+ if (length == 0)
+ break;
+ towrite = MIN (promised_size - written, length);
+ write_error = fd_write (sock, chunk, towrite, -1);
+ if (write_error < 0)
+ {
+ fclose (fp);
+ return -1;
+ }
+ written += towrite;
+ }
+ fclose (fp);
- /* The jar should have been created by now. */
- assert (wget_cookie_jar != NULL);
+ /* If we've written less than was promised, report a (probably
+ nonsensical) error rather than break the promise. */
+ if (written < promised_size)
+ {
+ errno = EINVAL;
+ return -1;
+ }
- cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, hdr);
- return 1;
+ assert (written == promised_size);
+ DEBUGP (("done]\n"));
+ return 0;
}
-
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
make it such. The persistence data is stored in the variables
- below. Ideally, it would be in a structure, and it should be
- possible to cache an arbitrary fixed number of these connections.
-
- I think the code is quite easy to extend in that direction. */
+ below. Ideally, it should be possible to cache an arbitrary fixed
+ number of these connections. */
/* Whether a persistent connection is active. */
-static int pc_active_p;
+static int pconn_active;
-/* Host and port of currently active persistent connection. */
-static struct address_list *pc_last_host_ip;
-static unsigned short pc_last_port;
+static struct {
+ /* The socket of the connection. */
+ int socket;
-/* File descriptor of the currently active persistent connection. */
-static int pc_last_fd;
+ /* Host and port of the currently active persistent connection. */
+ char *host;
+ int port;
-/* Whether a ssl handshake has occoured on this connection */
-static int pc_last_ssl_p;
+ /* Whether a ssl handshake has occoured on this connection. */
+ int ssl;
+} pconn;
-/* Mark the persistent connection as invalid. This is used by the
- CLOSE_* macros after they forcefully close a registered persistent
- connection. This does not close the file descriptor -- it is left
- to the caller to do that. (Maybe it should, though.) */
+/* Mark the persistent connection as invalid and free the resources it
+ uses. This is used by the CLOSE_* macros after they forcefully
+ close a registered persistent connection. */
static void
invalidate_persistent (void)
{
- pc_active_p = 0;
- pc_last_ssl_p = 0;
- if (pc_last_host_ip != NULL)
- {
- address_list_release (pc_last_host_ip);
- pc_last_host_ip = NULL;
- }
- DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
+ DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
+ pconn_active = 0;
+ fd_close (pconn.socket);
+ xfree (pconn.host);
+ xzero (pconn);
}
/* Register FD, which should be a TCP/IP connection to HOST:PORT, as
If a previous connection was persistent, it is closed. */
static void
-register_persistent (const char *host, unsigned short port, int fd, int ssl)
+register_persistent (const char *host, int port, int fd, int ssl)
{
- if (pc_active_p)
+ if (pconn_active)
{
- if (pc_last_fd == fd)
+ if (pconn.socket == fd)
{
- /* The connection FD is already registered. Nothing to
- do. */
+ /* The connection FD is already registered. */
return;
}
else
{
- /* The old persistent connection is still active; let's
- close it first. This situation arises whenever a
- persistent connection exists, but we then connect to a
- different host, and try to register a persistent
- connection to that one. */
- xclose (pc_last_fd);
+ /* The old persistent connection is still active; close it
+ first. This situation arises whenever a persistent
+ connection exists, but we then connect to a different
+ host, and try to register a persistent connection to that
+ one. */
invalidate_persistent ();
}
}
- assert (pc_last_host_ip == NULL);
+ pconn_active = 1;
+ pconn.socket = fd;
+ pconn.host = xstrdup (host);
+ pconn.port = port;
+ pconn.ssl = ssl;
- /* This lookup_host cannot fail, because it has the results in the
- cache. */
- pc_last_host_ip = lookup_host (host, LH_SILENT);
- assert (pc_last_host_ip != NULL);
-
- pc_last_port = port;
- pc_last_fd = fd;
- pc_active_p = 1;
- pc_last_ssl_p = ssl;
- DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
+ DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
}
/* Return non-zero if a persistent connection is available for
connecting to HOST:PORT. */
static int
-persistent_available_p (const char *host, unsigned short port, int ssl)
+persistent_available_p (const char *host, int port, int ssl,
+ int *host_lookup_failed)
{
- int success;
- struct address_list *this_host_ip;
-
/* First, check whether a persistent connection is active at all. */
- if (!pc_active_p)
- return 0;
- /* Second, check if the active connection pertains to the correct
- (HOST, PORT) ordered pair. */
- if (port != pc_last_port)
+ if (!pconn_active)
return 0;
- /* Second, a): check if current connection is (not) ssl, too. This
- test is unlikely to fail because HTTP and HTTPS typicaly use
- different ports. Yet it is possible, or so I [Christian
- Fraenkel] have been told, to run HTTPS and HTTP simultaneus on
- the same port. */
- if (ssl != pc_last_ssl_p)
+ /* If we want SSL and the last connection wasn't or vice versa,
+ don't use it. Checking for host and port is not enough because
+ HTTP and HTTPS can apparently coexist on the same port. */
+ if (ssl != pconn.ssl)
return 0;
- this_host_ip = lookup_host (host, 0);
- if (!this_host_ip)
+ /* If we're not connecting to the same port, we're not interested. */
+ if (port != pconn.port)
return 0;
- /* To equate the two host names for the purposes of persistent
- connections, they need to share all the IP addresses in the
- list. */
- success = address_list_match_all (pc_last_host_ip, this_host_ip);
- address_list_release (this_host_ip);
- if (!success)
- return 0;
+ /* If the host is the same, we're in business. If not, there is
+ still hope -- read below. */
+ if (0 != strcasecmp (host, pconn.host))
+ {
+ /* If pconn.socket is already talking to HOST, we needn't
+ reconnect. This happens often when both sites are virtual
+ hosts distinguished only by name and served by the same
+ network interface, and hence the same web server (possibly
+ set up by the ISP and serving many different web sites).
+ This admittedly non-standard optimization does not contradict
+ HTTP and works well with popular server software. */
+
+ int found;
+ ip_address ip;
+ struct address_list *al;
+
+ if (ssl)
+ /* Don't try to talk to two different SSL sites over the same
+ secure connection! (Besides, it's not clear if name-based
+ virtual hosting is even possible with SSL.) */
+ return 0;
+
+ /* If pconn.socket's peer is one of the IP addresses HOST
+ resolves to, pconn.socket is for all intents and purposes
+ already talking to HOST. */
+
+ if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
+ {
+ /* Can't get the peer's address -- something must be very
+ wrong with the connection. */
+ invalidate_persistent ();
+ return 0;
+ }
+ al = lookup_host (host, 0);
+ if (!al)
+ {
+ *host_lookup_failed = 1;
+ return 0;
+ }
+
+ found = address_list_contains (al, &ip);
+ address_list_release (al);
- /* Third: check whether the connection is still open. This is
+ if (!found)
+ return 0;
+
+ /* The persistent connection's peer address was found among the
+ addresses HOST resolved to; therefore, pconn.sock is in fact
+ already talking to HOST -- no need to reconnect. */
+ }
+
+ /* Finally, check whether the connection is still open. This is
important because most server implement a liberal (short) timeout
on persistent connections. Wget can of course always reconnect
if the connection doesn't work out, but it's nicer to know in
advance. This test is a logical followup of the first test, but
is "expensive" and therefore placed at the end of the list. */
- if (!test_socket_open (pc_last_fd))
+
+ if (!test_socket_open (pconn.socket))
{
/* Oops, the socket is no longer open. Now that we know that,
let's invalidate the persistent connection before returning
0. */
- xclose (pc_last_fd);
invalidate_persistent ();
return 0;
}
+
return 1;
}
#define CLOSE_FINISH(fd) do { \
if (!keep_alive) \
{ \
- xclose (fd); \
- if (pc_active_p && (fd) == pc_last_fd) \
+ if (pconn_active && (fd) == pconn.socket) \
invalidate_persistent (); \
+ else \
+ fd_close (fd); \
} \
} while (0)
#define CLOSE_INVALIDATE(fd) do { \
- xclose (fd); \
- if (pc_active_p && (fd) == pc_last_fd) \
+ if (pconn_active && (fd) == pconn.socket) \
invalidate_persistent (); \
+ else \
+ fd_close (fd); \
} while (0)
\f
struct http_stat
will print it if there is enough information to do so (almost
always), returning the error to the caller (i.e. http_loop).
- Various HTTP parameters are stored to hs. Although it parses the
- response code correctly, it is not used in a sane way. The caller
- can do that, though.
+ Various HTTP parameters are stored to hs.
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
char *pragma_h, *referer, *useragent, *range, *wwwauth;
char *authenticate_h;
char *proxyauth;
- char *all_headers;
char *port_maybe;
char *request_keep_alive;
- int sock, hcount, all_length, statcode;
+ int sock, statcode;
int write_error;
long contlen, contrange;
struct url *conn;
FILE *fp;
int auth_tried_already;
- struct rbuf rbuf;
int using_ssl = 0;
char *cookies = NULL;
+ char *head;
+ struct response *resp;
+ char hdrval[256];
+ char *message;
+ char *set_cookie;
+
/* Whether this connection will be kept alive after the HTTP request
is done. */
int keep_alive;
- /* Flags that detect the two ways of specifying HTTP keep-alive
- response. */
- int http_keep_alive_1, http_keep_alive_2;
+ /* Flag that detects having received a keep-alive response. */
+ int keep_alive_confirmed;
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive;
char *post_content_type, *post_content_length;
long post_data_size = 0;
+ int host_lookup_failed;
+
#ifdef HAVE_SSL
/* Initialize the SSL context. After the first run, this is a
no-op. */
know the local filename so we can save to it. */
assert (*hs->local_file != NULL);
- authenticate_h = 0;
+ authenticate_h = NULL;
auth_tried_already = 0;
inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
for the Digest authorization scheme.) */
keep_alive = 0;
- http_keep_alive_1 = http_keep_alive_2 = 0;
+ keep_alive_confirmed = 0;
post_content_type = NULL;
post_content_length = NULL;
server. */
conn = proxy ? proxy : u;
+ host_lookup_failed = 0;
+
/* First: establish the connection. */
if (inhibit_keep_alive
|| !persistent_available_p (conn->host, conn->port,
#else
0
#endif
- ))
+ , &host_lookup_failed))
{
+ /* In its current implementation, persistent_available_p will
+ look up conn->host in some cases. If that lookup failed, we
+ don't need to bother with connect_to_host. */
+ if (host_lookup_failed)
+ return HOSTERR;
+
sock = connect_to_host (conn->host, conn->port);
if (sock == E_HOST)
return HOSTERR;
else if (sock < 0)
- return CONNECT_ERROR (errno);
+ return (retryable_socket_connect_error (errno)
+ ? CONERROR : CONIMPOSSIBLE);
#ifdef HAVE_SSL
if (conn->scheme == SCHEME_HTTPS)
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET,
_("Unable to establish SSL connection.\n"));
- xclose (sock);
+ fd_close (sock);
return CONSSLERR;
}
using_ssl = 1;
}
else
{
- logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
- conn->host, conn->port);
- /* #### pc_last_fd should be accessed through an accessor
- function. */
- sock = pc_last_fd;
- using_ssl = pc_last_ssl_p;
+ logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
+ pconn.host, pconn.port);
+ sock = pconn.socket;
+ using_ssl = pconn.ssl;
DEBUGP (("Reusing fd %d.\n", sock));
}
post_content_type ? post_content_type : "",
post_content_length ? post_content_length : "",
opt.user_header ? opt.user_header : "");
- DEBUGP (("---request begin---\n%s", request));
+ DEBUGP (("\n---request begin---\n%s", request));
/* Free the temporary memory. */
xfree_null (wwwauth);
xfree (full_path);
/* Send the request to server. */
- write_error = xwrite (sock, request, strlen (request), -1);
+ write_error = fd_write (sock, request, strlen (request), -1);
if (write_error >= 0)
{
if (opt.post_data)
{
DEBUGP (("[POST data: %s]\n", opt.post_data));
- write_error = xwrite (sock, opt.post_data, post_data_size, -1);
+ write_error = fd_write (sock, opt.post_data, post_data_size, -1);
}
else if (opt.post_file_name && post_data_size != 0)
write_error = post_file (sock, opt.post_file_name, post_data_size);
statcode = -1;
*dt &= ~RETROKF;
- /* Before reading anything, initialize the rbuf. */
- rbuf_initialize (&rbuf, sock);
- all_headers = NULL;
- all_length = 0;
- /* Header-fetching loop. */
- hcount = 0;
- while (1)
+ head = fd_read_http_head (sock);
+ if (!head)
{
- char *hdr;
- int status;
-
- ++hcount;
- /* Get the header. */
- status = header_get (&rbuf, &hdr,
- /* Disallow continuations for status line. */
- (hcount == 1 ? HG_NO_CONTINUATIONS : HG_NONE));
-
- /* Check for errors. */
- if (status == HG_EOF && *hdr)
+ logputs (LOG_VERBOSE, "\n");
+ if (errno == 0)
{
- /* This used to be an unconditional error, but that was
- somewhat controversial, because of a large number of
- broken CGI's that happily "forget" to send the second EOL
- before closing the connection of a HEAD request.
-
- So, the deal is to check whether the header is empty
- (*hdr is zero if it is); if yes, it means that the
- previous header was fully retrieved, and that -- most
- probably -- the request is complete. "...be liberal in
- what you accept." Oh boy. */
- logputs (LOG_VERBOSE, "\n");
- logputs (LOG_NOTQUIET, _("End of file while parsing headers.\n"));
- xfree (hdr);
- xfree_null (type);
- xfree_null (all_headers);
+ logputs (LOG_NOTQUIET, _("No data received.\n"));
CLOSE_INVALIDATE (sock);
return HEOF;
}
- else if (status == HG_ERROR)
+ else
{
- logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
strerror (errno));
- xfree (hdr);
- xfree_null (type);
- xfree_null (all_headers);
CLOSE_INVALIDATE (sock);
return HERR;
}
+ }
- /* If the headers are to be saved to a file later, save them to
- memory now. */
- if (opt.save_headers)
- {
- int lh = strlen (hdr);
- all_headers = (char *)xrealloc (all_headers, all_length + lh + 2);
- memcpy (all_headers + all_length, hdr, lh);
- all_length += lh;
- all_headers[all_length++] = '\n';
- all_headers[all_length] = '\0';
- }
+ DEBUGP (("\n---response begin---\n"));
+ DEBUGP (("%s", head));
+ DEBUGP (("---response end---\n"));
- /* Check for status line. */
- if (hcount == 1)
- {
- const char *error;
- /* Parse the first line of server response. */
- statcode = parse_http_status_line (hdr, &error);
- hs->statcode = statcode;
- /* Store the descriptive response. */
- if (statcode == -1) /* malformed response */
- {
- /* A common reason for "malformed response" error is the
- case when no data was actually received. Handle this
- special case. */
- if (!*hdr)
- hs->error = xstrdup (_("No data received"));
- else
- hs->error = xstrdup (_("Malformed status line"));
- xfree (hdr);
- break;
- }
- else if (!*error)
- hs->error = xstrdup (_("(no description)"));
- else
- hs->error = xstrdup (error);
+ resp = response_new (head);
- if ((statcode != -1)
-#ifdef ENABLE_DEBUG
- && !opt.debug
-#endif
- )
- {
- if (opt.server_response)
- logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
- else
- logprintf (LOG_VERBOSE, "%2d %s", statcode, error);
- }
+ /* Check for status line. */
+ message = NULL;
+ statcode = response_status (resp, &message);
+ if (!opt.server_response)
+ logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
+ else
+ {
+ logprintf (LOG_VERBOSE, "\n");
+ print_server_response (resp);
+ }
- goto done_header;
- }
+ hs->statcode = statcode;
+ if (statcode == -1)
+ hs->error = xstrdup (_("Malformed status line"));
+ else if (!*message)
+ hs->error = xstrdup (_("(no description)"));
+ else
+ hs->error = xstrdup (message);
- /* Exit on empty header. */
- if (!*hdr)
+ if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
+ contlen = strtol (hdrval, NULL, 10);
+ type = response_header_strdup (resp, "Content-Type");
+ if (type)
+ {
+ char *tmp = strchr (type, ';');
+ if (tmp)
{
- xfree (hdr);
- break;
+ while (tmp > type && ISSPACE (tmp[-1]))
+ --tmp;
+ *tmp = '\0';
}
+ }
+ hs->newloc = response_header_strdup (resp, "Location");
+ hs->remote_time = response_header_strdup (resp, "Last-Modified");
+ set_cookie = response_header_strdup (resp, "Set-Cookie");
+ if (set_cookie)
+ {
+ /* The jar should have been created by now. */
+ assert (wget_cookie_jar != NULL);
+ cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
+ set_cookie);
+ xfree (set_cookie);
+ }
+ authenticate_h = response_header_strdup (resp, "WWW-Authenticate");
+ if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
+ {
+ long first_byte_pos, last_byte_pos, entity_length;
+ if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
+ &entity_length))
+ contrange = first_byte_pos;
+ }
- /* Print the header if requested. */
- if (opt.server_response && hcount != 1)
- logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
-
- /* Try getting content-length. */
- if (contlen == -1 && !opt.ignore_length)
- if (header_process (hdr, "Content-Length", header_extract_number,
- &contlen))
- goto done_header;
- /* Try getting content-type. */
- if (!type)
- if (header_process (hdr, "Content-Type", http_process_type, &type))
- goto done_header;
- /* Try getting location. */
- if (!hs->newloc)
- if (header_process (hdr, "Location", header_strdup, &hs->newloc))
- goto done_header;
- /* Try getting last-modified. */
- if (!hs->remote_time)
- if (header_process (hdr, "Last-Modified", header_strdup,
- &hs->remote_time))
- goto done_header;
- /* Try getting cookies. */
- if (opt.cookies)
- if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u))
- goto done_header;
- /* Try getting www-authentication. */
- if (!authenticate_h)
- if (header_process (hdr, "WWW-Authenticate", header_strdup,
- &authenticate_h))
- goto done_header;
- /* Check for accept-ranges header. If it contains the word
- `none', disable the ranges. */
- if (*dt & ACCEPTRANGES)
- {
- int nonep;
- if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep))
- {
- if (nonep)
- *dt &= ~ACCEPTRANGES;
- goto done_header;
- }
- }
- /* Try getting content-range. */
- if (contrange == -1)
- {
- struct http_process_range_closure closure;
- if (header_process (hdr, "Content-Range", http_process_range, &closure))
- {
- contrange = closure.first_byte_pos;
- goto done_header;
- }
- }
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive)
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
+ {
+ if (response_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = 1;
+ else if (response_header_copy (resp, "Connection", hdrval,
+ sizeof (hdrval)))
{
- /* Check for the `Keep-Alive' header. */
- if (!http_keep_alive_1)
- {
- if (header_process (hdr, "Keep-Alive", header_exists,
- &http_keep_alive_1))
- goto done_header;
- }
- /* Check for `Connection: Keep-Alive'. */
- if (!http_keep_alive_2)
- {
- if (header_process (hdr, "Connection", http_process_connection,
- &http_keep_alive_2))
- goto done_header;
- }
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = 1;
}
- done_header:
- xfree (hdr);
}
+ response_free (resp);
- logputs (LOG_VERBOSE, "\n");
-
- if (contlen != -1
- && (http_keep_alive_1 || http_keep_alive_2))
- {
- assert (inhibit_keep_alive == 0);
- keep_alive = 1;
- }
if (keep_alive)
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
xfree_null (type);
- xfree_null (all_headers);
return NEWLOCATION;
}
}
/* Mark as successfully retrieved. */
*dt |= RETROKF;
xfree_null (type);
- xfree_null (all_headers);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
return RETRUNNEEDED;
Continued download failed on this file, which conflicts with `-c'.\n\
Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
xfree_null (type);
- xfree_null (all_headers);
CLOSE_INVALIDATE (sock);
return CONTNOTSUPPORTED;
}
/* This means the whole request was somehow misunderstood by the
server. Bail out. */
xfree_null (type);
- xfree_null (all_headers);
CLOSE_INVALIDATE (sock);
return RANGEERR;
}
hs->len = 0L;
hs->res = 0;
xfree_null (type);
- xfree_null (all_headers);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
return RETRFINISHED;
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
- xfree_null (all_headers);
return FOPENERR;
}
}
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
if (opt.save_headers)
- fwrite (all_headers, 1, all_length, fp);
+ fwrite (head, 1, strlen (head), fp);
/* Get the contents of the document. */
- hs->res = get_contents (sock, fp, &hs->len, hs->restval,
+ hs->res = fd_read_body (sock, fp, &hs->len, hs->restval,
(contlen != -1 ? contlen : 0),
- &rbuf, keep_alive, &hs->dltime);
+ keep_alive, &hs->dltime);
if (hs->res >= 0)
CLOSE_FINISH (sock);
if (flush_res == EOF)
hs->res = -2;
}
- xfree_null (all_headers);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
return res;
}
+#define SKIP_WS(x) do { \
+ while (ISSPACE (*(x))) \
+ ++(x); \
+} while (0)
+
#ifdef USE_DIGEST
/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
of a field in such a header. If the field is the one specified by
cp += strlen (attr_name);
if (!*cp)
return -1;
- cp += skip_lws (cp);
+ SKIP_WS (cp);
if (*cp != '=')
return -1;
if (!*++cp)
return -1;
- cp += skip_lws (cp);
+ SKIP_WS (cp);
if (*cp != '\"')
return -1;
if (!*++cp)
{
int i;
- au += skip_lws (au);
+ SKIP_WS (au);
for (i = 0; i < countof (options); i++)
{
int skip = extract_header_attr (au, options[i].name,
au++;
if (*au && *++au)
{
- au += skip_lws (au);
+ SKIP_WS (au);
if (*au == '\"')
{
au++;
void
http_cleanup (void)
{
- if (pc_last_host_ip)
- address_list_release (pc_last_host_ip);
}