/* HTTP support.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
- Free Software Foundation, Inc.
+ Copyright (C) 2003 Free Software Foundation, Inc.
This file is part of GNU Wget.
extern char *version_string;
extern LARGE_INT total_downloaded_bytes;
+extern FILE *output_stream;
+extern int output_stream_regular;
+
#ifndef MIN
# define MIN(x, y) ((x) > (y) ? (y) : (x))
#endif
/* Some status code validation macros: */
#define H_20X(x) (((x) >= 200) && ((x) < 300))
#define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
-#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
- || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
+#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
+ || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
+ || (x) == HTTP_STATUS_SEE_OTHER \
|| (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
/* HTTP/1.0 status codes from RFC1945, provided for reference. */
#define HTTP_STATUS_MULTIPLE_CHOICES 300
#define HTTP_STATUS_MOVED_PERMANENTLY 301
#define HTTP_STATUS_MOVED_TEMPORARILY 302
+#define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */
#define HTTP_STATUS_NOT_MODIFIED 304
-#define HTTP_STATUS_TEMPORARY_REDIRECT 307
+#define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */
/* Client error 4xx. */
#define HTTP_STATUS_BAD_REQUEST 400
#define HTTP_STATUS_UNAUTHORIZED 401
#define HTTP_STATUS_FORBIDDEN 403
#define HTTP_STATUS_NOT_FOUND 404
+#define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
/* Server errors 5xx. */
#define HTTP_STATUS_INTERNAL 500
xfree_null (req->headers);
xfree (req);
}
+
+/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
+ PROMISED_SIZE bytes are sent over the wire -- if the file is
+ longer, read only that much; if the file is shorter, report an error. */
+
+static int
+post_file (int sock, const char *file_name, long promised_size)
+{
+ static char chunk[8192];
+ long written = 0;
+ int write_error;
+ FILE *fp;
+
+ DEBUGP (("[writing POST file %s ... ", file_name));
+
+ fp = fopen (file_name, "rb");
+ if (!fp)
+ return -1;
+ while (!feof (fp) && written < promised_size)
+ {
+ int towrite;
+ int length = fread (chunk, 1, sizeof (chunk), fp);
+ if (length == 0)
+ break;
+ towrite = MIN (promised_size - written, length);
+ write_error = fd_write (sock, chunk, towrite, -1);
+ if (write_error < 0)
+ {
+ fclose (fp);
+ return -1;
+ }
+ written += towrite;
+ }
+ fclose (fp);
+
+ /* If we've written less than was promised, report a (probably
+ nonsensical) error rather than break the promise. */
+ if (written < promised_size)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ assert (written == promised_size);
+ DEBUGP (("done]\n"));
+ return 0;
+}
\f
static const char *
head_terminator (const char *hunk, int oldlen, int peeklen)
return 0;
if (bufsize)
{
- int len = MIN (e - b, bufsize);
- strncpy (buf, b, len);
+ int len = MIN (e - b, bufsize - 1);
+ memcpy (buf, b, len);
buf[len] = '\0';
}
return 1;
if (!resp->headers)
{
- /* For a HTTP/0.9 response, always assume 200 response. */
+ /* For a HTTP/0.9 response, assume status 200. */
if (message)
*message = xstrdup (_("No headers, assuming HTTP/0.9"));
return 200;
return -1;
p += 4;
- /* "/x.x" (optional because some Gnutella servers have been reported
- as not sending the "/x.x" part. */
+ /* Match the HTTP version. This is optional because Gnutella
+ servers have been reported to not specify HTTP version. */
if (p < end && *p == '/')
{
++p;
*entity_length_ptr = num;
return 1;
}
-\f
-/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
- PROMISED_SIZE bytes are sent over the wire -- if the file is
- longer, read only that much; if the file is shorter, report an error. */
-
-static int
-post_file (int sock, const char *file_name, long promised_size)
-{
- static char chunk[8192];
- long written = 0;
- int write_error;
- FILE *fp;
- DEBUGP (("[writing POST file %s ... ", file_name));
+/* Read the body of the request, but don't store it anywhere and don't
+ display a progress gauge. This is useful for reading the error
+ responses whose bodies don't need to be displayed or logged, but
+ which need to be read anyway. */
- fp = fopen (file_name, "rb");
- if (!fp)
- return -1;
- while (!feof (fp) && written < promised_size)
- {
- int towrite;
- int length = fread (chunk, 1, sizeof (chunk), fp);
- if (length == 0)
- break;
- towrite = MIN (promised_size - written, length);
- write_error = fd_write (sock, chunk, towrite, -1);
- if (write_error < 0)
- {
- fclose (fp);
- return -1;
- }
- written += towrite;
- }
- fclose (fp);
+static void
+skip_short_body (int fd, long contlen)
+{
+ /* Skipping the body doesn't make sense if the content length is
+ unknown because, in that case, persistent connections cannot be
+ used. (#### This is not the case with HTTP/1.1 where they can
+ still be used with the magic of the "chunked" transfer!) */
+ if (contlen == -1)
+ return;
+ DEBUGP (("Skipping %ld bytes of body data... ", contlen));
- /* If we've written less than was promised, report a (probably
- nonsensical) error rather than break the promise. */
- if (written < promised_size)
+ while (contlen > 0)
{
- errno = EINVAL;
- return -1;
+ char dlbuf[512];
+ int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1);
+ if (ret <= 0)
+ return;
+ contlen -= ret;
}
-
- assert (written == promised_size);
- DEBUGP (("done]\n"));
- return 0;
+ DEBUGP (("done.\n"));
}
\f
/* Persistent connections. Currently, we cache the most recently used
if (pconn_active && (fd) == pconn.socket) \
invalidate_persistent (); \
else \
- fd_close (fd); \
+ { \
+ fd_close (fd); \
+ fd = -1; \
+ } \
} \
} while (0)
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
- double dltime; /* time of the download in msecs */
- int no_truncate; /* whether truncating the file is
- forbidden. */
+ long rd_size; /* amount of data read from socket */
+ double dltime; /* time it took to download the data */
const char *referer; /* value of the referer header. */
char **local_file; /* local file. */
};
FILE *fp;
int sock = -1;
+ int flags;
/* Whether authorization has been already tried. */
int auth_tried_already = 0;
is done. */
int keep_alive;
- /* Flag that detects having received a keep-alive response. */
- int keep_alive_confirmed;
-
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive = !opt.http_keep_alive;
int host_lookup_failed = 0;
#ifdef HAVE_SSL
- /* Initialize the SSL context. After the first run, this is a
- no-op. */
- switch (ssl_init ())
+ if (u->scheme == SCHEME_HTTPS)
{
- case SSLERRCTXCREATE:
- /* this is fatal */
- logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
- return SSLERRCTXCREATE;
- case SSLERRCERTFILE:
- /* try without certfile */
- logprintf (LOG_NOTQUIET,
- _("Failed to load certificates from %s\n"),
- opt.sslcertfile);
- logprintf (LOG_NOTQUIET,
- _("Trying without the specified certificate\n"));
- break;
- case SSLERRCERTKEY:
- logprintf (LOG_NOTQUIET,
- _("Failed to get certificate key from %s\n"),
- opt.sslcertkey);
- logprintf (LOG_NOTQUIET,
- _("Trying without the specified certificate\n"));
- break;
- default:
- break;
+ /* Initialize the SSL context. After this has once been done,
+ it becomes a no-op. */
+ switch (ssl_init ())
+ {
+ case SSLERRCTXCREATE:
+ /* this is fatal */
+ logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
+ return SSLERRCTXCREATE;
+ case SSLERRCERTFILE:
+ /* try without certfile */
+ logprintf (LOG_NOTQUIET,
+ _("Failed to load certificates from %s\n"),
+ opt.sslcertfile);
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ case SSLERRCERTKEY:
+ logprintf (LOG_NOTQUIET,
+ _("Failed to get certificate key from %s\n"),
+ opt.sslcertkey);
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ default:
+ break;
+ }
}
#endif /* HAVE_SSL */
}
}
request_set_header (req, "Content-Length",
- aprintf ("Content-Length: %ld", post_data_size),
- rel_value);
+ aprintf ("%ld", post_data_size), rel_value);
}
/* Add the user headers. */
for the Digest authorization scheme.) */
keep_alive = 0;
- keep_alive_confirmed = 0;
/* Establish the connection. */
else if (opt.post_file_name && post_data_size != 0)
write_error = post_file (sock, opt.post_file_name, post_data_size);
}
- DEBUGP (("---request end---\n"));
if (write_error < 0)
{
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
proxy ? "Proxy" : "HTTP");
- contlen = contrange = -1;
+ contlen = -1;
+ contrange = 0;
type = NULL;
statcode = -1;
*dt &= ~RETROKF;
print_server_response (resp, " ");
}
+ if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
+ contlen = strtol (hdrval, NULL, 10);
+
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
+ {
+ if (response_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = 1;
+ else if (response_header_copy (resp, "Connection", hdrval,
+ sizeof (hdrval)))
+ {
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = 1;
+ }
+ }
+ if (keep_alive)
+ /* The server has promised that it will not close the connection
+ when we're done. This means that we can register it. */
+ register_persistent (conn->host, conn->port, sock, using_ssl);
+
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
- CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
- might be more bytes in the body. */
+ skip_short_body (sock, contlen);
+ CLOSE_FINISH (sock);
if (auth_tried_already || !(user && passwd))
{
/* If we have tried it already, then there is not point
else
hs->error = xstrdup (message);
- if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
- contlen = strtol (hdrval, NULL, 10);
type = response_header_strdup (resp, "Content-Type");
if (type)
{
&entity_length))
contrange = first_byte_pos;
}
-
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive && contlen != -1)
- {
- if (response_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = 1;
- else if (response_header_copy (resp, "Connection", hdrval,
- sizeof (hdrval)))
- {
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = 1;
- }
- }
response_free (resp);
- if (keep_alive)
- /* The server has promised that it will not close the connection
- when we're done. This means that we can register it. */
- register_persistent (conn->host, conn->port, sock, using_ssl);
-
/* 20x responses are counted among successful by default. */
if (H_20X (statcode))
*dt |= RETROKF;
_("Location: %s%s\n"),
hs->newloc ? hs->newloc : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
- might be more bytes in the body. */
+ if (keep_alive)
+ skip_short_body (sock, contlen);
+ CLOSE_FINISH (sock);
xfree_null (type);
return NEWLOCATION;
}
}
}
- if (contrange == -1)
+ if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
{
- /* We did not get a content-range header. This means that the
- server did not honor our `Range' request. Normally, this
- means we should reset hs->restval and continue normally. */
-
- /* However, if `-c' is used, we need to be a bit more careful:
-
- 1. If `-c' is specified and the file already existed when
- Wget was started, it would be a bad idea for us to start
- downloading it from scratch, effectively truncating it. I
- believe this cannot happen unless `-c' was specified.
-
- 2. If `-c' is used on a file that is already fully
- downloaded, we're requesting bytes after the end of file,
- which can result in server not honoring `Range'. If this is
- the case, `Content-Length' will be equal to the length of the
- file. */
- if (opt.always_rest)
- {
- /* Check for condition #2. */
- if (hs->restval > 0 /* restart was requested. */
- && contlen != -1 /* we got content-length. */
- && hs->restval >= contlen /* file fully downloaded
- or has shrunk. */
- )
- {
- logputs (LOG_VERBOSE, _("\
+ /* If `-c' is in use and the file has been fully downloaded (or
+ the remote file has shrunk), Wget effectively requests bytes
+ after the end of file and the server response with 416. */
+ logputs (LOG_VERBOSE, _("\
\n The file is already fully retrieved; nothing to do.\n\n"));
- /* In case the caller inspects. */
- hs->len = contlen;
- hs->res = 0;
- /* Mark as successfully retrieved. */
- *dt |= RETROKF;
- xfree_null (type);
- CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
- might be more bytes in the body. */
- return RETRUNNEEDED;
- }
-
- /* Check for condition #1. */
- if (hs->no_truncate)
- {
- logprintf (LOG_NOTQUIET,
- _("\
-\n\
-Continued download failed on this file, which conflicts with `-c'.\n\
-Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
- xfree_null (type);
- CLOSE_INVALIDATE (sock);
- return CONTNOTSUPPORTED;
- }
-
- /* Fallthrough */
- }
-
- hs->restval = 0;
+ /* In case the caller inspects. */
+ hs->len = contlen;
+ hs->res = 0;
+ /* Mark as successfully retrieved. */
+ *dt |= RETROKF;
+ xfree_null (type);
+ CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
+ might be more bytes in the body. */
+ return RETRUNNEEDED;
}
- else if (contrange != hs->restval ||
- (H_PARTIAL (statcode) && contrange == -1))
+ if ((contrange != 0 && contrange != hs->restval)
+ || (H_PARTIAL (statcode) && !contrange))
{
- /* This means the whole request was somehow misunderstood by the
- server. Bail out. */
+ /* The Range request was somehow misunderstood by the server.
+ Bail out. */
xfree_null (type);
CLOSE_INVALIDATE (sock);
return RANGEERR;
}
-
- if (hs->restval)
- {
- if (contlen != -1)
- contlen += contrange;
- else
- contrange = -1; /* If conent-length was not sent,
- content-range will be ignored. */
- }
- hs->contlen = contlen;
+ hs->contlen = contlen + contrange;
if (opt.verbose)
{
logputs (LOG_VERBOSE, _("Length: "));
if (contlen != -1)
{
- logputs (LOG_VERBOSE, legible (contlen));
- if (contrange != -1)
- logprintf (LOG_VERBOSE, _(" (%s to go)"),
- legible (contlen - contrange));
+ logputs (LOG_VERBOSE, legible (contlen + contrange));
+ if (contrange)
+ logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen));
}
else
logputs (LOG_VERBOSE,
hs->len = 0L;
hs->res = 0;
xfree_null (type);
- CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
- might be more bytes in the body. */
+ /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
+ servers not to send body in response to a HEAD request. If
+ you encounter such a server (more likely a broken CGI), use
+ `--no-http-keep-alive'. */
+ CLOSE_FINISH (sock);
return RETRFINISHED;
}
/* Open the local file. */
- if (!opt.dfp)
+ if (!output_stream)
{
mkalldirs (*hs->local_file);
if (opt.backups)
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
- CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
- might be more bytes in the body. */
+ CLOSE_INVALIDATE (sock);
return FOPENERR;
}
}
- else /* opt.dfp */
- {
- extern int global_download_count;
- fp = opt.dfp;
- /* To ensure that repeated "from scratch" downloads work for -O
- files, we rewind the file pointer, unless restval is
- non-zero. (This works only when -O is used on regular files,
- but it's still a valuable feature.)
-
- However, this loses when more than one URL is specified on
- the command line the second rewinds eradicates the contents
- of the first download. Thus we disable the above trick for
- all the downloads except the very first one.
-
- #### A possible solution to this would be to remember the
- file position in the output document and to seek to that
- position, instead of rewinding.
-
- We don't truncate stdout, since that breaks
- "wget -O - [...] >> foo".
- */
- if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
- {
- /* This will silently fail for streams that don't correspond
- to regular files, but that's OK. */
- rewind (fp);
- /* ftruncate is needed because opt.dfp is opened in append
- mode if opt.always_rest is set. */
- ftruncate (fileno (fp), 0);
- clearerr (fp);
- }
- }
+ else
+ fp = output_stream;
- /* #### This confuses the code that checks for file size. There
- should be some overhead information. */
+ /* #### This confuses the timestamping code that checks for file
+ size. Maybe we should save some additional information? */
if (opt.save_headers)
fwrite (head, 1, strlen (head), fp);
- /* Get the contents of the document. */
- hs->res = fd_read_body (sock, fp, &hs->len, hs->restval,
- (contlen != -1 ? contlen : 0),
- keep_alive, &hs->dltime);
+ /* Download the request body. */
+ flags = 0;
+ if (keep_alive)
+ flags |= rb_read_exactly;
+ if (hs->restval > 0 && contrange == 0)
+ /* If the server ignored our range request, instruct fd_read_body
+ to skip the first RESTVAL bytes of body. */
+ flags |= rb_skip_startpos;
+ hs->len = hs->restval;
+ hs->rd_size = 0;
+ hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
+ hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
+ flags);
if (hs->res >= 0)
CLOSE_FINISH (sock);
error here. Checking the result of fwrite() is not enough --
errors could go unnoticed! */
int flush_res;
- if (!opt.dfp)
+ if (!output_stream)
flush_res = fclose (fp);
else
flush_res = fflush (fp);
if (strchr (u->url, '*'))
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
+ xzero (hstat);
+
/* Determine the local filename. */
if (local_file && *local_file)
hstat.local_file = local_file;
}
/* Reset the counter. */
count = 0;
- *dt = 0 | ACCEPTRANGES;
+ *dt = 0;
/* THE loop */
do
{
if (opt.verbose)
{
char *hurl = url_string (u, 1);
- char tmp[15];
+ char tmp[256];
strcpy (tmp, " ");
if (count > 1)
sprintf (tmp, _("(try:%2d)"), count);
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
- /* Assume no restarting. */
- hstat.restval = 0L;
+
/* Decide whether or not to restart. */
- if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
- /* #### this calls access() and then stat(); could be optimized. */
- && file_exists_p (locf))
- if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
- hstat.restval = st.st_size;
-
- /* In `-c' is used and the file is existing and non-empty,
- refuse to truncate it if the server doesn't support continued
- downloads. */
- hstat.no_truncate = 0;
- if (opt.always_rest && hstat.restval)
- hstat.no_truncate = 1;
+ hstat.restval = 0;
+ if (count > 1)
+ hstat.restval = hstat.len; /* continue where we left off */
+ else if (opt.always_rest
+ && stat (locf, &st) == 0
+ && S_ISREG (st.st_mode))
+ hstat.restval = st.st_size;
/* Decide whether to send the no-cache directive. We send it in
two cases:
const char *fl = NULL;
if (opt.output_document)
{
- if (opt.od_known_regular)
+ if (output_stream_regular)
fl = opt.output_document;
}
else
return RETROK;
}
- tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
+ tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
if (hstat.len == hstat.contlen)
{