trunc: check for `close'-ing the fd errors.

[wget] / src / http.c
diff --git a/src/http.c b/src/http.c

index cd0dba85ea73be5cc8cf230606e1cf485647b3a5..6a2ffe86aa0b5c44d62aaf124d43978e8754d4cb 100644 (file)
--- a/src/http.c
+++ b/src/http.c
@@ -1,6 +1,7 @@
  /* HTTP support.
-   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+   Inc.
  
  This file is part of GNU Wget.
  
@@ -33,9 +34,7 @@ as that of the covered work.  */
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
  #include <assert.h>
  #include <errno.h>
  #include <time.h>
@@ -56,16 +55,19 @@ as that of the covered work.  */
  # include "http-ntlm.h"
  #endif
  #include "cookies.h"
-#ifdef ENABLE_DIGEST
-# include "gen-md5.h"
-#endif
+#include "md5.h"
  #include "convert.h"
  #include "spider.h"
+#include "warc.h"
  
  #ifdef TESTING
  #include "test.h"
  #endif
  
+#ifdef __VMS
+# include "vms.h"
+#endif /* def __VMS */
+
  extern char *version_string;
  
  /* Forward decls. */
@@ -91,6 +93,7 @@ static struct cookie_jar *wget_cookie_jar;
  #define TEXTCSS_S "text/css"
  
  /* Some status code validation macros: */
+#define H_10X(x)        (((x) >= 100) && ((x) < 200))
  #define H_20X(x)        (((x) >= 200) && ((x) < 300))
  #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
  #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY          \
@@ -142,6 +145,8 @@ struct request {
    int hcount, hcapacity;
  };
  
+extern int numurls;
+
  /* Create a new, empty request.  At least request_set_method must be
     called before the request can be used.  */
  
@@ -316,10 +321,12 @@ request_remove_header (struct request *req, char *name)
    p += A_len;                                   \
  } while (0)
  
-/* Construct the request and write it to FD using fd_write.  */
+/* Construct the request and write it to FD using fd_write.
+   If warc_tmp is set to a file pointer, the request string will
+   also be written to that file. */
  
  static int
-request_send (const struct request *req, int fd)
+request_send (const struct request *req, int fd, FILE *warc_tmp)
  {
    char *request_string, *p;
    int i, size, write_error;
@@ -346,7 +353,7 @@ request_send (const struct request *req, int fd)
  
    APPEND (p, req->method); *p++ = ' ';
    APPEND (p, req->arg);    *p++ = ' ';
-  memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
+  memcpy (p, "HTTP/1.1\r\n", 10); p += 10;
  
    for (i = 0; i < req->hcount; i++)
      {
@@ -370,6 +377,13 @@ request_send (const struct request *req, int fd)
    if (write_error < 0)
      logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
                 fd_errstr (fd));
+  else if (warc_tmp != NULL)
+    {
+      /* Write a copy of the data to the WARC record. */
+      int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp);
+      if (warc_tmp_written != size - 1)
+        return -2;
+    }
    return write_error;
  }
  
@@ -400,18 +414,18 @@ maybe_send_basic_creds (const char *hostname, const char *user,
  
    if (opt.auth_without_challenge)
      {
-      DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+      DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n"));
        do_challenge = true;
      }
    else if (basic_authed_hosts
        && hash_table_contains(basic_authed_hosts, hostname))
      {
-      DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
+      DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
        do_challenge = true;
      }
    else
      {
-      DEBUGP(("Host %s has not issued a general basic challenge.\n",
+      DEBUGP (("Host %s has not issued a general basic challenge.\n",
                quote (hostname)));
      }
    if (do_challenge)
@@ -433,17 +447,19 @@ register_basic_auth_host (const char *hostname)
    if (!hash_table_contains(basic_authed_hosts, hostname))
      {
        hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
-      DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
+      DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname)));
      }
  }
  
  
  /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
     PROMISED_SIZE bytes are sent over the wire -- if the file is
-   longer, read only that much; if the file is shorter, report an error.  */
+   longer, read only that much; if the file is shorter, report an error.
+   If warc_tmp is set to a file pointer, the post data will
+   also be written to that file.  */
  
  static int
-post_file (int sock, const char *file_name, wgint promised_size)
+post_file (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp)
  {
    static char chunk[8192];
    wgint written = 0;
@@ -468,6 +484,16 @@ post_file (int sock, const char *file_name, wgint promised_size)
            fclose (fp);
            return -1;
          }
+      if (warc_tmp != NULL)
+        {
+          /* Write a copy of the data to the WARC record. */
+          int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp);
+          if (warc_tmp_written != towrite)
+            {
+              fclose (fp);
+              return -2;
+            }
+        }
        written += towrite;
      }
    fclose (fp);
@@ -768,7 +794,7 @@ resp_status (const struct response *resp, char **message)
        while (p < end && c_isdigit (*p))
          ++p;
        if (p < end && *p == '.')
-        ++p; 
+        ++p;
        while (p < end && c_isdigit (*p))
          ++p;
      }
@@ -813,7 +839,7 @@ print_response_line(const char *prefix, const char *b, const char *e)
  {
    char *copy;
    BOUNDED_TO_ALLOCA(b, e, copy);
-  logprintf (LOG_ALWAYS, "%s%s\n", prefix, 
+  logprintf (LOG_ALWAYS, "%s%s\n", prefix,
               quotearg_style (escape_quoting_style, copy));
  }
  
@@ -895,29 +921,49 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr,
     mode, the body is displayed for debugging purposes.  */
  
  static bool
-skip_short_body (int fd, wgint contlen)
+skip_short_body (int fd, wgint contlen, bool chunked)
  {
    enum {
      SKIP_SIZE = 512,                /* size of the download buffer */
      SKIP_THRESHOLD = 4096        /* the largest size we read */
    };
+  wgint remaining_chunk_size = 0;
    char dlbuf[SKIP_SIZE + 1];
    dlbuf[SKIP_SIZE] = '\0';        /* so DEBUGP can safely print it */
  
-  /* We shouldn't get here with unknown contlen.  (This will change
-     with HTTP/1.1, which supports "chunked" transfer.)  */
-  assert (contlen != -1);
+  assert (contlen != -1 || contlen);
  
    /* If the body is too large, it makes more sense to simply close the
       connection than to try to read the body.  */
    if (contlen > SKIP_THRESHOLD)
      return false;
  
-  DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
-
-  while (contlen > 0)
+  while (contlen > 0 || chunked)
      {
-      int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
+      int ret;
+      if (chunked)
+        {
+          if (remaining_chunk_size == 0)
+            {
+              char *line = fd_read_line (fd);
+              char *endl;
+              if (line == NULL)
+                break;
+
+              remaining_chunk_size = strtol (line, &endl, 16);
+              if (remaining_chunk_size == 0)
+                {
+                  fd_read_line (fd);
+                  break;
+                }
+            }
+
+          contlen = MIN (remaining_chunk_size, SKIP_SIZE);
+        }
+
+      DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
+
+      ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
        if (ret <= 0)
          {
            /* Don't normally report the error since this is an
@@ -927,6 +973,15 @@ skip_short_body (int fd, wgint contlen)
            return false;
          }
        contlen -= ret;
+
+      if (chunked)
+        {
+          remaining_chunk_size -= ret;
+          if (remaining_chunk_size == 0)
+            if (fd_read_line (fd) == NULL)
+              return false;
+        }
+
        /* Safe even if %.*s bogusly expects terminating \0 because
           we've zero-terminated dlbuf above.  */
        DEBUGP (("%.*s", ret, dlbuf));
@@ -936,6 +991,66 @@ skip_short_body (int fd, wgint contlen)
    return true;
  }
  
+#define NOT_RFC2231 0
+#define RFC2231_NOENCODING 1
+#define RFC2231_ENCODING 2
+
+/* extract_param extracts the parameter name into NAME.
+   However, if the parameter name is in RFC2231 format then
+   this function adjusts NAME by stripping of the trailing
+   characters that are not part of the name but are present to
+   indicate the presence of encoding information in the value
+   or a fragment of a long parameter value
+*/
+static int
+modify_param_name(param_token *name)
+{
+  const char *delim1 = memchr (name->b, '*', name->e - name->b);
+  const char *delim2 = memrchr (name->b, '*', name->e - name->b);
+
+  int result;
+
+  if(delim1 == NULL)
+    {
+      result = NOT_RFC2231;
+    }
+  else if(delim1 == delim2)
+    {
+      if ((name->e - 1) == delim1)
+       {
+         result = RFC2231_ENCODING;
+       }
+      else
+       {
+         result = RFC2231_NOENCODING;
+       }
+      name->e = delim1;
+    }
+  else
+    {
+      name->e = delim1;
+      result = RFC2231_ENCODING;
+    }
+  return result;
+}
+
+/* extract_param extract the paramater value into VALUE.
+   Like modify_param_name this function modifies VALUE by
+   stripping off the encoding information from the actual value
+*/
+static void
+modify_param_value (param_token *value, int encoding_type )
+{
+  if (RFC2231_ENCODING == encoding_type)
+    {
+      const char *delim = memrchr (value->b, '\'', value->e - value->b);
+      if ( delim != NULL )
+       {
+         value->b = (delim+1);
+       }
+    }
+}
+
  /* Extract a parameter from the string (typically an HTTP header) at
     **SOURCE and advance SOURCE to the next parameter.  Return false
     when there are no more parameters to extract.  The name of the
@@ -1007,9 +1122,31 @@ extract_param (const char **source, param_token *name, param_token *value,
        if (*p == separator) ++p;
      }
    *source = p;
+
+  int param_type = modify_param_name(name);
+  if (NOT_RFC2231 != param_type)
+    {
+      modify_param_value(value, param_type);
+    }
    return true;
  }
  
+#undef NOT_RFC2231
+#undef RFC2231_NOENCODING
+#undef RFC2231_ENCODING
+
+/* Appends the string represented by VALUE to FILENAME */
+
+static void
+append_value_to_filename (char **filename, param_token const * const value)
+{
+  int original_length = strlen(*filename);
+  int new_length = strlen(*filename) + (value->e - value->b);
+  *filename = xrealloc (*filename, new_length+1);
+  memcpy (*filename + original_length, value->b, (value->e - value->b)); 
+  (*filename)[new_length] = '\0';
+}
+
  #undef MAX
  #define MAX(p, q) ((p) > (q) ? (p) : (q))
  
@@ -1028,47 +1165,46 @@ extract_param (const char **source, param_token *name, param_token *value,
     false.
  
     The file name is stripped of directory components and must not be
-   empty.  */
+   empty.
+
+   Historically, this function returned filename prefixed with opt.dir_prefix,
+   now that logic is handled by the caller, new code should pay attention,
+   changed by crq, Sep 2010.
  
+*/
  static bool
  parse_content_disposition (const char *hdr, char **filename)
  {
    param_token name, value;
+  *filename = NULL;
    while (extract_param (&hdr, &name, &value, ';'))
-    if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
-      {
-        /* Make the file name begin at the last slash or backslash. */
-        const char *last_slash = memrchr (value.b, '/', value.e - value.b);
-        const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
-        if (last_slash && last_bs)
-          value.b = 1 + MAX (last_slash, last_bs);
-        else if (last_slash || last_bs)
-          value.b = 1 + (last_slash ? last_slash : last_bs);
-        if (value.b == value.e)
-          continue;
-        /* Start with the directory prefix, if specified. */
-        if (opt.dir_prefix)
-          {
-            int prefix_length = strlen (opt.dir_prefix);
-            bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
-            int total_length;
-
-            if (add_slash) 
-              ++prefix_length;
-            total_length = prefix_length + (value.e - value.b);            
-            *filename = xmalloc (total_length + 1);
-            strcpy (*filename, opt.dir_prefix);
-            if (add_slash) 
-              (*filename)[prefix_length - 1] = '/';
-            memcpy (*filename + prefix_length, value.b, (value.e - value.b));
-            (*filename)[total_length] = '\0';
-          }
-        else
-          *filename = strdupdelim (value.b, value.e);
-        return true;
-      }
-  return false;
+    {
+      int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
+      if ( isFilename && value.b != NULL)
+        {
+          /* Make the file name begin at the last slash or backslash. */
+          const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+          const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+          if (last_slash && last_bs)
+            value.b = 1 + MAX (last_slash, last_bs);
+          else if (last_slash || last_bs)
+            value.b = 1 + (last_slash ? last_slash : last_bs);
+          if (value.b == value.e)
+            continue;
+
+          if (*filename)
+            append_value_to_filename (filename, &value);
+          else
+            *filename = strdupdelim (value.b, value.e);
+        }
+    }
+
+  if (*filename)
+    return true;
+  else
+    return false;
  }
+
  \f
  /* Persistent connections.  Currently, we cache the most recently used
     connection as persistent, provided that the HTTP server agrees to
@@ -1308,12 +1444,12 @@ struct http_stat
                                     existence after having begun to download
                                     (needed in gethttp for when connection is
                                     interrupted/restarted. */
-  bool timestamp_checked;       /* true if pre-download time-stamping checks 
+  bool timestamp_checked;       /* true if pre-download time-stamping checks
                                   * have already been performed */
    char *orig_file_name;         /* name of file to compare for time-stamping
                                   * (might be != local_file if -K is set) */
    wgint orig_file_size;         /* size of file to compare for time-stamping */
-  time_t orig_file_tstamp;      /* time-stamp of file to compare for 
+  time_t orig_file_tstamp;      /* time-stamp of file to compare for
                                   * time-stamping */
  };
  
@@ -1334,18 +1470,175 @@ free_hstat (struct http_stat *hs)
    hs->error = NULL;
  }
  
+static void
+get_file_flags (const char *filename, int *dt)
+{
+  logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"), quote (filename));
+  /* If the file is there, we suppose it's retrieved OK.  */
+  *dt |= RETROKF;
+
+  /* #### Bogusness alert.  */
+  /* If its suffix is "html" or "htm" or similar, assume text/html.  */
+  if (has_html_suffix_p (filename))
+    *dt |= TEXTHTML;
+}
+
+/* Download the response body from the socket and writes it to
+   an output file.  The headers have already been read from the
+   socket.  If WARC is enabled, the response body will also be
+   written to a WARC response record.
+
+   hs, contlen, contrange, chunked_transfer_encoding and url are
+   parameters from the gethttp method.  fp is a pointer to the
+   output file.
+
+   url, warc_timestamp_str, warc_request_uuid, warc_ip, type
+   and statcode will be saved in the headers of the WARC record.
+   The head parameter contains the HTTP headers of the response.
+ 
+   If fp is NULL and WARC is enabled, the response body will be
+   written only to the WARC file.  If WARC is disabled and fp
+   is a file pointer, the data will be written to the file.
+   If fp is a file pointer and WARC is enabled, the body will
+   be written to both destinations.
+   
+   Returns the error code.   */
+static int
+read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
+                    wgint contrange, bool chunked_transfer_encoding,
+                    char *url, char *warc_timestamp_str, char *warc_request_uuid,
+                    ip_address *warc_ip, char *type, int statcode, char *head)
+{
+  int warc_payload_offset = 0;
+  FILE *warc_tmp = NULL;
+  int warcerr = 0;
+
+  if (opt.warc_filename != NULL)
+    {
+      /* Open a temporary file where we can write the response before we
+         add it to the WARC record.  */
+      warc_tmp = warc_tempfile ();
+      if (warc_tmp == NULL)
+        warcerr = WARC_TMP_FOPENERR;
+
+      if (warcerr == 0)
+        {
+          /* We should keep the response headers for the WARC record.  */
+          int head_len = strlen (head);
+          int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp);
+          if (warc_tmp_written != head_len)
+            warcerr = WARC_TMP_FWRITEERR;
+          warc_payload_offset = head_len;
+        }
+
+      if (warcerr != 0)
+        {
+          if (warc_tmp != NULL)
+            fclose (warc_tmp);
+          return warcerr;
+        }
+    }
+
+  if (fp != NULL)
+    {
+      /* This confuses the timestamping code that checks for file size.
+         #### The timestamping code should be smarter about file size.  */
+      if (opt.save_headers && hs->restval == 0)
+        fwrite (head, 1, strlen (head), fp);
+    }
+
+  /* Read the response body.  */
+  int flags = 0;
+  if (contlen != -1)
+    /* If content-length is present, read that much; otherwise, read
+       until EOF.  The HTTP spec doesn't require the server to
+       actually close the connection when it's done sending data. */
+    flags |= rb_read_exactly;
+  if (fp != NULL && hs->restval > 0 && contrange == 0)
+    /* If the server ignored our range request, instruct fd_read_body
+       to skip the first RESTVAL bytes of body.  */
+    flags |= rb_skip_startpos;
+  if (chunked_transfer_encoding)
+    flags |= rb_chunked_transfer_encoding;
+
+  hs->len = hs->restval;
+  hs->rd_size = 0;
+  /* Download the response body and write it to fp.
+     If we are working on a WARC file, we simultaneously write the
+     response body to warc_tmp.  */
+  hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
+                          hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
+                          flags, warc_tmp);
+  if (hs->res >= 0)
+    {
+      if (warc_tmp != NULL)
+        {
+          /* Create a response record and write it to the WARC file.
+             Note: per the WARC standard, the request and response should share
+             the same date header.  We re-use the timestamp of the request.
+             The response record should also refer to the uuid of the request.  */
+          bool r = warc_write_response_record (url, warc_timestamp_str,
+                                               warc_request_uuid, warc_ip,
+                                               warc_tmp, warc_payload_offset,
+                                               type, statcode, hs->newloc);
+
+          /* warc_write_response_record has closed warc_tmp. */
+
+          if (! r)
+            return WARC_ERR;
+        }
+
+      return RETRFINISHED;
+    }
+  
+  if (warc_tmp != NULL)
+    fclose (warc_tmp);
+
+  if (hs->res == -2)
+    {
+      /* Error while writing to fd. */
+      return FWRITEERR;
+    }
+  else if (hs->res == -3)
+    {
+      /* Error while writing to warc_tmp. */
+      return WARC_TMP_FWRITEERR;
+    }
+  else
+    {
+      /* A read error! */
+      hs->rderrmsg = xstrdup (fd_errstr (sock));
+      return RETRFINISHED;
+    }
+}
+
  #define BEGINS_WITH(line, string_constant)                               \
    (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)    \
     && (c_isspace (line[sizeof (string_constant) - 1])                      \
         || !line[sizeof (string_constant) - 1]))
  
+#ifdef __VMS
+#define SET_USER_AGENT(req) do {                                         \
+  if (!opt.useragent)                                                    \
+    request_set_header (req, "User-Agent",                               \
+                        aprintf ("Wget/%s (VMS %s %s)",                  \
+                        version_string, vms_arch(), vms_vers()),         \
+                        rel_value);                                      \
+  else if (*opt.useragent)                                               \
+    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
+} while (0)
+#else /* def __VMS */
  #define SET_USER_AGENT(req) do {                                         \
    if (!opt.useragent)                                                    \
      request_set_header (req, "User-Agent",                               \
-                        aprintf ("Wget/%s", version_string), rel_value); \
+                        aprintf ("Wget/%s (%s)",                         \
+                        version_string, OS_TYPE),                        \
+                        rel_value);                                      \
    else if (*opt.useragent)                                               \
      request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
  } while (0)
+#endif /* def __VMS [else] */
  
  /* The flags that allow clobbering the file (opening with "wb").
     Defined here to avoid repetition later.  #### This will require
@@ -1364,7 +1657,8 @@ free_hstat (struct http_stat *hs)
     If PROXY is non-NULL, the connection will be made to the proxy
     server, and u->url will be requested.  */
  static uerr_t
-gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+         struct iri *iri, int count)
  {
    struct request *req;
  
@@ -1376,9 +1670,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
    wgint contlen, contrange;
    struct url *conn;
    FILE *fp;
+  int err;
  
    int sock = -1;
-  int flags;
  
    /* Set to 1 when the authorization has already been sent and should
       not be tried again. */
@@ -1404,19 +1698,24 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
    char hdrval[256];
    char *message;
  
+  /* Declare WARC variables. */
+  bool warc_enabled = (opt.warc_filename != NULL);
+  FILE *warc_tmp = NULL;
+  char warc_timestamp_str [21];
+  char warc_request_uuid [48];
+  ip_address *warc_ip = NULL;
+  long int warc_payload_offset = -1;
+
    /* Whether this connection will be kept alive after the HTTP request
       is done. */
    bool keep_alive;
  
-  /* Whether keep-alive should be inhibited.
+  /* Is the server using the chunked transfer encoding?  */
+  bool chunked_transfer_encoding = false;
  
-     RFC 2068 requests that 1.0 clients not send keep-alive requests
-     to proxies.  This is because many 1.0 proxies do not interpret
-     the Connection header and transfer it to the remote server,
-     causing it to not close the connection and leave both the proxy
-     and the client hanging.  */
+  /* Whether keep-alive should be inhibited.  */
    bool inhibit_keep_alive =
-    !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
+    !opt.http_keep_alive || opt.ignore_length;
  
    /* Headers sent when using POST. */
    wgint post_data_size = 0;
@@ -1479,7 +1778,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
  
    request_set_header (req, "Referer", (char *) hs->referer, rel_none);
    if (*dt & SEND_NOCACHE)
-    request_set_header (req, "Pragma", "no-cache", rel_none);
+    {
+      /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms...  */
+      request_set_header (req, "Cache-Control", "no-cache, must-revalidate", rel_none);
+
+      /* ... but some HTTP/1.0 caches doesn't implement Cache-Control.  */
+      request_set_header (req, "Pragma", "no-cache", rel_none);
+    }
    if (hs->restval)
      request_set_header (req, "Range",
                          aprintf ("bytes=%s-",
@@ -1526,20 +1831,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
                          rel_value);
    }
  
-  if (!inhibit_keep_alive)
-    request_set_header (req, "Connection", "Keep-Alive", rel_none);
-
-  if (opt.cookies)
-    request_set_header (req, "Cookie",
-                        cookie_header (wget_cookie_jar,
-                                       u->host, u->port, u->path,
-#ifdef HAVE_SSL
-                                       u->scheme == SCHEME_HTTPS
-#else
-                                       0
-#endif
-                                       ),
-                        rel_value);
+  if (inhibit_keep_alive)
+    request_set_header (req, "Connection", "Close", rel_none);
+  else
+    {
+      if (proxy == NULL)
+        request_set_header (req, "Connection", "Keep-Alive", rel_none);
+      else
+        {
+          request_set_header (req, "Connection", "Close", rel_none);
+          request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
+        }
+    }
  
    if (opt.post_data || opt.post_file_name)
      {
@@ -1562,6 +1865,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
                            rel_value);
      }
  
+ retry_with_auth:
+  /* We need to come back here when the initial attempt to retrieve
+     without authorization header fails.  (Expected to happen at least
+     for the Digest authorization scheme.)  */
+
+  if (opt.cookies)
+    request_set_header (req, "Cookie",
+                        cookie_header (wget_cookie_jar,
+                                       u->host, u->port, u->path,
+#ifdef HAVE_SSL
+                                       u->scheme == SCHEME_HTTPS
+#else
+                                       0
+#endif
+                                       ),
+                        rel_value);
+
    /* Add the user headers. */
    if (opt.user_headers)
      {
@@ -1570,11 +1890,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
          request_set_user_header (req, opt.user_headers[i]);
      }
  
- retry_with_auth:
-  /* We need to come back here when the initial attempt to retrieve
-     without authorization header fails.  (Expected to happen at least
-     for the Digest authorization scheme.)  */
-
    proxyauth = NULL;
    if (proxy)
      {
@@ -1610,11 +1925,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
          request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
      }
  
-  keep_alive = false;
+  keep_alive = true;
  
    /* Establish the connection.  */
  
-  if (!inhibit_keep_alive)
+  if (inhibit_keep_alive)
+    keep_alive = false;
+  else
      {
        /* Look for a persistent connection to target host, unless a
           proxy is used.  The exception is when SSL is in use, in which
@@ -1637,7 +1954,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
            sock = pconn.socket;
            using_ssl = pconn.ssl;
            logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
-                     quotearg_style (escape_quoting_style, pconn.host), 
+                     quotearg_style (escape_quoting_style, pconn.host),
                       pconn.port);
            DEBUGP (("Reusing fd %d.\n", sock));
            if (pconn.authorized)
@@ -1694,7 +2011,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
               that the contents of Host would be exactly the same as
               the contents of CONNECT.  */
  
-          write_error = request_send (connreq, sock);
+          write_error = request_send (connreq, sock, 0);
            request_free (connreq);
            if (write_error < 0)
              {
@@ -1720,6 +2037,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
  
            resp = resp_new (head);
            statcode = resp_status (resp, &message);
+          if (statcode < 0)
+            {
+              char *tms = datetime_str (time (NULL));
+              logprintf (LOG_VERBOSE, "%d\n", statcode);
+              logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+                         quotearg_style (escape_quoting_style,
+                                         _("Malformed status line")));
+              xfree (head);
+              return HERR;
+            }
            hs->message = xstrdup (message);
            resp_free (resp);
            xfree (head);
@@ -1741,18 +2068,41 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
  
        if (conn->scheme == SCHEME_HTTPS)
          {
-          if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
+          if (!ssl_connect_wget (sock))
              {
                fd_close (sock);
                return CONSSLERR;
              }
+          else if (!ssl_check_certificate (sock, u->host))
+            {
+              fd_close (sock);
+              return VERIFCERTERR;
+            }
            using_ssl = true;
          }
  #endif /* HAVE_SSL */
      }
  
+  /* Open the temporary file where we will write the request. */
+  if (warc_enabled)
+    {
+      warc_tmp = warc_tempfile ();
+      if (warc_tmp == NULL)
+        {
+          CLOSE_INVALIDATE (sock);
+          request_free (req);
+          return WARC_TMP_FOPENERR;
+        }
+
+      if (! proxy)
+        {
+          warc_ip = (ip_address *) alloca (sizeof (ip_address));
+          socket_ip_address (sock, warc_ip, ENDPOINT_PEER);
+        }
+    }
+
    /* Send the request to server.  */
-  write_error = request_send (req, sock);
+  write_error = request_send (req, sock, warc_tmp);
  
    if (write_error >= 0)
      {
@@ -1760,16 +2110,39 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
          {
            DEBUGP (("[POST data: %s]\n", opt.post_data));
            write_error = fd_write (sock, opt.post_data, post_data_size, -1);
+          if (write_error >= 0 && warc_tmp != NULL)
+            {
+              /* Remember end of headers / start of payload. */
+              warc_payload_offset = ftell (warc_tmp);
+
+              /* Write a copy of the data to the WARC record. */
+              int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp);
+              if (warc_tmp_written != post_data_size)
+                write_error = -2;
+            }
          }
        else if (opt.post_file_name && post_data_size != 0)
-        write_error = post_file (sock, opt.post_file_name, post_data_size);
+        {
+          if (warc_tmp != NULL)
+            /* Remember end of headers / start of payload. */
+            warc_payload_offset = ftell (warc_tmp);
+
+          write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp);
+        }
      }
  
    if (write_error < 0)
      {
        CLOSE_INVALIDATE (sock);
        request_free (req);
-      return WRITEFAILED;
+
+      if (warc_tmp != NULL)
+        fclose (warc_tmp);
+
+      if (write_error == -2)
+        return WARC_TMP_FWRITEERR;
+      else
+        return WRITEFAILED;
      }
    logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
               proxy ? "Proxy" : "HTTP");
@@ -1777,6 +2150,30 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
    contrange = 0;
    *dt &= ~RETROKF;
  
+
+  if (warc_enabled)
+    {
+      bool warc_result;
+      /* Generate a timestamp and uuid for this request. */
+      warc_timestamp (warc_timestamp_str);
+      warc_uuid_str (warc_request_uuid);
+
+      /* Create a request record and store it in the WARC file. */
+      warc_result = warc_write_request_record (u->url, warc_timestamp_str,
+                                               warc_request_uuid, warc_ip,
+                                               warc_tmp, warc_payload_offset);
+      if (! warc_result)
+        {
+          CLOSE_INVALIDATE (sock);
+          request_free (req);
+          return WARC_ERR;
+        }
+
+      /* warc_write_request_record has also closed warc_tmp. */
+    }
+
+
+read_header:
    head = read_http_response_head (sock);
    if (!head)
      {
@@ -1803,6 +2200,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
    /* Check for status line.  */
    message = NULL;
    statcode = resp_status (resp, &message);
+  if (statcode < 0)
+    {
+      char *tms = datetime_str (time (NULL));
+      logprintf (LOG_VERBOSE, "%d\n", statcode);
+      logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+                 quotearg_style (escape_quoting_style,
+                                 _("Malformed status line")));
+      CLOSE_INVALIDATE (sock);
+      request_free (req);
+      xfree (head);
+      return HERR;
+    }
+
+  if (H_10X (statcode))
+    {
+      DEBUGP (("Ignoring response\n"));
+      xfree (head);
+      goto read_header;
+    }
+
    hs->message = xstrdup (message);
    if (!opt.server_response)
      logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
@@ -1813,149 +2230,64 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
        print_server_response (resp, "  ");
      }
  
-  /* Determine the local filename if needed. Notice that if -O is used 
-   * hstat.local_file is set by http_loop to the argument of -O. */
-  if (!hs->local_file)
+  if (!opt.ignore_length
+      && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
      {
-      /* Honor Content-Disposition whether possible. */
-      if (!opt.content_disposition
-          || !resp_header_copy (resp, "Content-Disposition", 
-                                hdrval, sizeof (hdrval))
-          || !parse_content_disposition (hdrval, &hs->local_file))
+      wgint parsed;
+      errno = 0;
+      parsed = str_to_wgint (hdrval, NULL, 10);
+      if (parsed == WGINT_MAX && errno == ERANGE)
          {
-          /* The Content-Disposition header is missing or broken. 
-           * Choose unique file name according to given URL. */
-          hs->local_file = url_file_name (u);
+          /* Out of range.
+             #### If Content-Length is out of range, it most likely
+             means that the file is larger than 2G and that we're
+             compiled without LFS.  In that case we should probably
+             refuse to even attempt to download the file.  */
+          contlen = -1;
+        }
+      else if (parsed < 0)
+        {
+          /* Negative Content-Length; nonsensical, so we can't
+             assume any information about the content to receive. */
+          contlen = -1;
          }
+      else
+        contlen = parsed;
      }
-  
-  /* TODO: perform this check only once. */
-  if (!hs->existence_checked && file_exists_p (hs->local_file))
+
+  /* Check for keep-alive related responses. */
+  if (!inhibit_keep_alive && contlen != -1)
      {
-      if (opt.noclobber && !opt.output_document)
+      if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
          {
-          /* If opt.noclobber is turned on and file already exists, do not
-             retrieve the file. But if the output_document was given, then this
-             test was already done and the file didn't exist. Hence the !opt.output_document */
-          logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"), quote (hs->local_file));
-          /* If the file is there, we suppose it's retrieved OK.  */
-          *dt |= RETROKF;
-
-          /* #### Bogusness alert.  */
-          /* If its suffix is "html" or "htm" or similar, assume text/html.  */
-          if (has_html_suffix_p (hs->local_file))
-            *dt |= TEXTHTML;
-
-          return RETRUNNEEDED;
-        }
-      else if (!ALLOW_CLOBBER)
-        {
-          char *unique = unique_name (hs->local_file, true);
-          if (unique != hs->local_file)
-            xfree (hs->local_file);
-          hs->local_file = unique;
+          if (0 == strcasecmp (hdrval, "Close"))
+            keep_alive = false;
          }
      }
-  hs->existence_checked = true;
  
-  /* Support timestamping */
-  /* TODO: move this code out of gethttp. */
-  if (opt.timestamping && !hs->timestamp_checked)
-    {
-      size_t filename_len = strlen (hs->local_file);
-      char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
-      bool local_dot_orig_file_exists = false;
-      char *local_filename = NULL;
-      struct_stat st;
+  chunked_transfer_encoding = false;
+  if (resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval))
+      && 0 == strcasecmp (hdrval, "chunked"))
+    chunked_transfer_encoding = true;
  
-      if (opt.backup_converted)
-        /* If -K is specified, we'll act on the assumption that it was specified
-           last time these files were downloaded as well, and instead of just
-           comparing local file X against server file X, we'll compare local
-           file X.orig (if extant, else X) against server file X.  If -K
-           _wasn't_ specified last time, or the server contains files called
-           *.orig, -N will be back to not operating correctly with -k. */
-        {
-          /* Would a single s[n]printf() call be faster?  --dan
-
-             Definitely not.  sprintf() is horribly slow.  It's a
-             different question whether the difference between the two
-             affects a program.  Usually I'd say "no", but at one
-             point I profiled Wget, and found that a measurable and
-             non-negligible amount of time was lost calling sprintf()
-             in url.c.  Replacing sprintf with inline calls to
-             strcpy() and number_to_string() made a difference.
-             --hniksic */
-          memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
-          memcpy (filename_plus_orig_suffix + filename_len,
-                  ".orig", sizeof (".orig"));
-
-          /* Try to stat() the .orig file. */
-          if (stat (filename_plus_orig_suffix, &st) == 0)
-            {
-              local_dot_orig_file_exists = true;
-              local_filename = filename_plus_orig_suffix;
-            }
-        }      
-
-      if (!local_dot_orig_file_exists)
-        /* Couldn't stat() <file>.orig, so try to stat() <file>. */
-        if (stat (hs->local_file, &st) == 0)
-          local_filename = hs->local_file;
-
-      if (local_filename != NULL)
-        /* There was a local file, so we'll check later to see if the version
-           the server has is the same version we already have, allowing us to
-           skip a download. */
-        {
-          hs->orig_file_name = xstrdup (local_filename);
-          hs->orig_file_size = st.st_size;
-          hs->orig_file_tstamp = st.st_mtime;
-#ifdef WINDOWS
-          /* Modification time granularity is 2 seconds for Windows, so
-             increase local time by 1 second for later comparison. */
-          ++hs->orig_file_tstamp;
-#endif
-        }
-    }
-
-  if (!opt.ignore_length
-      && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
+  /* Handle (possibly multiple instances of) the Set-Cookie header. */
+  if (opt.cookies)
      {
-      wgint parsed;
-      errno = 0;
-      parsed = str_to_wgint (hdrval, NULL, 10);
-      if (parsed == WGINT_MAX && errno == ERANGE)
-        {
-          /* Out of range.
-             #### If Content-Length is out of range, it most likely
-             means that the file is larger than 2G and that we're
-             compiled without LFS.  In that case we should probably
-             refuse to even attempt to download the file.  */
-          contlen = -1;
-        }
-      else if (parsed < 0)
+      int scpos;
+      const char *scbeg, *scend;
+      /* The jar should have been created by now. */
+      assert (wget_cookie_jar != NULL);
+      for (scpos = 0;
+           (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
+                                        &scbeg, &scend)) != -1;
+           ++scpos)
          {
-          /* Negative Content-Length; nonsensical, so we can't
-             assume any information about the content to receive. */
-          contlen = -1;
+          char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
+          cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
+                                    u->path, set_cookie);
          }
-      else
-        contlen = parsed;
      }
  
-  /* Check for keep-alive related responses. */
-  if (!inhibit_keep_alive && contlen != -1)
-    {
-      if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
-        keep_alive = true;
-      else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
-        {
-          if (0 == strcasecmp (hdrval, "Keep-Alive"))
-            keep_alive = true;
-        }
-    }
    if (keep_alive)
      /* The server has promised that it will not close the connection
         when we're done.  This means that we can register it.  */
@@ -1964,10 +2296,42 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
    if (statcode == HTTP_STATUS_UNAUTHORIZED)
      {
        /* Authorization is required.  */
-      if (keep_alive && !head_only && skip_short_body (sock, contlen))
-        CLOSE_FINISH (sock);
+
+      /* Normally we are not interested in the response body.
+         But if we are writing a WARC file we are: we like to keep everyting.  */
+      if (warc_enabled)
+        {
+          int err;
+          type = resp_header_strdup (resp, "Content-Type");
+          err = read_response_body (hs, sock, NULL, contlen, 0,
+                                    chunked_transfer_encoding,
+                                    u->url, warc_timestamp_str,
+                                    warc_request_uuid, warc_ip, type,
+                                    statcode, head);
+          xfree_null (type);
+
+          if (err != RETRFINISHED || hs->res < 0)
+            {
+              CLOSE_INVALIDATE (sock);
+              request_free (req);
+              xfree_null (message);
+              resp_free (resp);
+              xfree (head);
+              return err;
+            }
+          else
+            CLOSE_FINISH (sock);
+        }
        else
-        CLOSE_INVALIDATE (sock);
+        {
+          /* Since WARC is disabled, we are not interested in the response body.  */
+          if (keep_alive && !head_only
+              && skip_short_body (sock, contlen, chunked_transfer_encoding))
+            CLOSE_FINISH (sock);
+          else
+            CLOSE_INVALIDATE (sock);
+        }
+
        pconn.authorized = false;
        if (!auth_finished && (user && passwd))
          {
@@ -2014,6 +2378,9 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
                    register_basic_auth_host (u->host);
                  }
                xfree (pth);
+              xfree_null (message);
+              resp_free (resp);
+              xfree (head);
                goto retry_with_auth;
              }
            else
@@ -2024,6 +2391,9 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
          }
        logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
        request_free (req);
+      xfree_null (message);
+      resp_free (resp);
+      xfree (head);
        return AUTHFAILED;
      }
    else /* statcode != HTTP_STATUS_UNAUTHORIZED */
@@ -2032,6 +2402,115 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
        if (ntlm_seen)
          pconn.authorized = true;
      }
+
+  /* Determine the local filename if needed. Notice that if -O is used
+   * hstat.local_file is set by http_loop to the argument of -O. */
+  if (!hs->local_file)
+    {
+      char *local_file = NULL;
+
+      /* Honor Content-Disposition whether possible. */
+      if (!opt.content_disposition
+          || !resp_header_copy (resp, "Content-Disposition",
+                                hdrval, sizeof (hdrval))
+          || !parse_content_disposition (hdrval, &local_file))
+        {
+          /* The Content-Disposition header is missing or broken.
+           * Choose unique file name according to given URL. */
+          hs->local_file = url_file_name (u, NULL);
+        }
+      else
+        {
+          DEBUGP (("Parsed filename from Content-Disposition: %s\n",
+                  local_file));
+          hs->local_file = url_file_name (u, local_file);
+        }
+    }
+
+  /* TODO: perform this check only once. */
+  if (!hs->existence_checked && file_exists_p (hs->local_file))
+    {
+      if (opt.noclobber && !opt.output_document)
+        {
+          /* If opt.noclobber is turned on and file already exists, do not
+             retrieve the file. But if the output_document was given, then this
+             test was already done and the file didn't exist. Hence the !opt.output_document */
+          get_file_flags (hs->local_file, dt);
+          xfree (head);
+          xfree_null (message);
+          return RETRUNNEEDED;
+        }
+      else if (!ALLOW_CLOBBER)
+        {
+          char *unique = unique_name (hs->local_file, true);
+          if (unique != hs->local_file)
+            xfree (hs->local_file);
+          hs->local_file = unique;
+        }
+    }
+  hs->existence_checked = true;
+
+  /* Support timestamping */
+  /* TODO: move this code out of gethttp. */
+  if (opt.timestamping && !hs->timestamp_checked)
+    {
+      size_t filename_len = strlen (hs->local_file);
+      char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
+      bool local_dot_orig_file_exists = false;
+      char *local_filename = NULL;
+      struct_stat st;
+
+      if (opt.backup_converted)
+        /* If -K is specified, we'll act on the assumption that it was specified
+           last time these files were downloaded as well, and instead of just
+           comparing local file X against server file X, we'll compare local
+           file X.orig (if extant, else X) against server file X.  If -K
+           _wasn't_ specified last time, or the server contains files called
+           *.orig, -N will be back to not operating correctly with -k. */
+        {
+          /* Would a single s[n]printf() call be faster?  --dan
+
+             Definitely not.  sprintf() is horribly slow.  It's a
+             different question whether the difference between the two
+             affects a program.  Usually I'd say "no", but at one
+             point I profiled Wget, and found that a measurable and
+             non-negligible amount of time was lost calling sprintf()
+             in url.c.  Replacing sprintf with inline calls to
+             strcpy() and number_to_string() made a difference.
+             --hniksic */
+          memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
+          memcpy (filename_plus_orig_suffix + filename_len,
+                  ORIG_SFX, sizeof (ORIG_SFX));
+
+          /* Try to stat() the .orig file. */
+          if (stat (filename_plus_orig_suffix, &st) == 0)
+            {
+              local_dot_orig_file_exists = true;
+              local_filename = filename_plus_orig_suffix;
+            }
+        }
+
+      if (!local_dot_orig_file_exists)
+        /* Couldn't stat() <file>.orig, so try to stat() <file>. */
+        if (stat (hs->local_file, &st) == 0)
+          local_filename = hs->local_file;
+
+      if (local_filename != NULL)
+        /* There was a local file, so we'll check later to see if the version
+           the server has is the same version we already have, allowing us to
+           skip a download. */
+        {
+          hs->orig_file_name = xstrdup (local_filename);
+          hs->orig_file_size = st.st_size;
+          hs->orig_file_tstamp = st.st_mtime;
+#ifdef WINDOWS
+          /* Modification time granularity is 2 seconds for Windows, so
+             increase local time by 1 second for later comparison. */
+          ++hs->orig_file_tstamp;
+#endif
+        }
+    }
+
    request_free (req);
  
    hs->statcode = statcode;
@@ -2049,32 +2528,25 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
        char *tmp = strchr (type, ';');
        if (tmp)
          {
+          /* sXXXav: only needed if IRI support is enabled */
+          char *tmp2 = tmp + 1;
+
            while (tmp > type && c_isspace (tmp[-1]))
              --tmp;
            *tmp = '\0';
+
+          /* Try to get remote encoding if needed */
+          if (opt.enable_iri && !opt.encoding_remote)
+            {
+              tmp = parse_charset (tmp2);
+              if (tmp)
+                set_content_encoding (iri, tmp);
+            }
          }
      }
    hs->newloc = resp_header_strdup (resp, "Location");
    hs->remote_time = resp_header_strdup (resp, "Last-Modified");
  
-  /* Handle (possibly multiple instances of) the Set-Cookie header. */
-  if (opt.cookies)
-    {
-      int scpos;
-      const char *scbeg, *scend;
-      /* The jar should have been created by now. */
-      assert (wget_cookie_jar != NULL);
-      for (scpos = 0;
-           (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
-                                        &scbeg, &scend)) != -1;
-           ++scpos)
-        {
-          char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
-          cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
-                                    u->path, set_cookie);
-        }
-    }
-
    if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
      {
        wgint first_byte_pos, last_byte_pos, entity_length;
@@ -2107,11 +2579,53 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
                       _("Location: %s%s\n"),
                       hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
                       hs->newloc ? _(" [following]") : "");
-          if (keep_alive && !head_only && skip_short_body (sock, contlen))
-            CLOSE_FINISH (sock);
+ 
+          /* In case the caller cares to look...  */
+          hs->len = 0;
+          hs->res = 0;
+          hs->restval = 0;
+
+          /* Normally we are not interested in the response body of a redirect.
+             But if we are writing a WARC file we are: we like to keep everyting.  */
+          if (warc_enabled)
+            {
+              int err = read_response_body (hs, sock, NULL, contlen, 0,
+                                            chunked_transfer_encoding,
+                                            u->url, warc_timestamp_str,
+                                            warc_request_uuid, warc_ip, type,
+                                            statcode, head);
+
+              if (err != RETRFINISHED || hs->res < 0)
+                {
+                  CLOSE_INVALIDATE (sock);
+                  xfree_null (type);
+                  xfree (head);
+                  return err;
+                }
+              else
+                CLOSE_FINISH (sock);
+            }
            else
-            CLOSE_INVALIDATE (sock);
+            {
+              /* Since WARC is disabled, we are not interested in the response body.  */
+              if (keep_alive && !head_only
+                  && skip_short_body (sock, contlen, chunked_transfer_encoding))
+                CLOSE_FINISH (sock);
+              else
+                CLOSE_INVALIDATE (sock);
+            }
+
            xfree_null (type);
+          xfree (head);
+          /* From RFC2616: The status codes 303 and 307 have
+             been added for servers that wish to make unambiguously
+             clear which kind of reaction is expected of the client.
+             
+             A 307 should be redirected using the same method,
+             in other words, a POST should be preserved and not
+             converted to a GET in that case. */
+          if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
+            return NEWLOCATION_KEEP_POST;
            return NEWLOCATION;
          }
      }
@@ -2121,7 +2635,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
       content-type.  */
    if (!type ||
          0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
-        0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))    
+        0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
      *dt |= TEXTHTML;
    else
      *dt &= ~TEXTHTML;
@@ -2132,10 +2646,10 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
    else
      *dt &= ~TEXTCSS;
  
-  if (opt.html_extension)
+  if (opt.adjust_extension)
      {
        if (*dt & TEXTHTML)
-        /* -E / --html-extension / html_extension = on was specified,
+        /* -E / --adjust-extension / adjust_extension = on was specified,
             and this is a text/html file.  If some case-insensitive
             variation on ".htm[l]" isn't already the file's suffix,
             tack on ".html". */
@@ -2149,9 +2663,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
      }
  
    if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
-      || (hs->restval > 0 && statcode == HTTP_STATUS_OK
-          && contrange == 0 && hs->restval >= contlen)
-     )
+      || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
+          && contrange == 0 && contlen >= 0 && hs->restval >= contlen))
      {
        /* If `-c' is in use and the file has been fully downloaded (or
           the remote file has shrunk), Wget effectively requests bytes
@@ -2167,6 +2680,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
        xfree_null (type);
        CLOSE_INVALIDATE (sock);        /* would be CLOSE_FINISH, but there
                                     might be more bytes in the body. */
+      xfree (head);
        return RETRUNNEEDED;
      }
    if ((contrange != 0 && contrange != hs->restval)
@@ -2176,6 +2690,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
           Bail out.  */
        xfree_null (type);
        CLOSE_INVALIDATE (sock);
+      xfree (head);
        return RANGEERR;
      }
    if (contlen == -1)
@@ -2217,31 +2732,68 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
              logputs (LOG_VERBOSE, "\n");
          }
      }
-  xfree_null (type);
-  type = NULL;                        /* We don't need it any more.  */
  
    /* Return if we have no intention of further downloading.  */
-  if (!(*dt & RETROKF) || head_only)
+  if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
      {
        /* In case the caller cares to look...  */
        hs->len = 0;
        hs->res = 0;
-      xfree_null (type);
-      if (head_only)
-        /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
-           servers not to send body in response to a HEAD request, and
-           those that do will likely be caught by test_socket_open.
-           If not, they can be worked around using
-           `--no-http-keep-alive'.  */
-        CLOSE_FINISH (sock);
-      else if (keep_alive && skip_short_body (sock, contlen))
-        /* Successfully skipped the body; also keep using the socket. */
-        CLOSE_FINISH (sock);
+      hs->restval = 0;
+
+      /* Normally we are not interested in the response body of a error responses.
+         But if we are writing a WARC file we are: we like to keep everyting.  */
+      if (warc_enabled)
+        {
+          int err = read_response_body (hs, sock, NULL, contlen, 0,
+                                        chunked_transfer_encoding,
+                                        u->url, warc_timestamp_str,
+                                        warc_request_uuid, warc_ip, type,
+                                        statcode, head);
+
+          if (err != RETRFINISHED || hs->res < 0)
+            {
+              CLOSE_INVALIDATE (sock);
+              xfree (head);
+              xfree_null (type);
+              return err;
+            }
+          else
+            CLOSE_FINISH (sock);
+        }
        else
-        CLOSE_INVALIDATE (sock);
+        {
+          /* Since WARC is disabled, we are not interested in the response body.  */
+          if (head_only)
+            /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
+               servers not to send body in response to a HEAD request, and
+               those that do will likely be caught by test_socket_open.
+               If not, they can be worked around using
+               `--no-http-keep-alive'.  */
+            CLOSE_FINISH (sock);
+          else if (keep_alive
+                   && skip_short_body (sock, contlen, chunked_transfer_encoding))
+            /* Successfully skipped the body; also keep using the socket. */
+            CLOSE_FINISH (sock);
+          else
+            CLOSE_INVALIDATE (sock);
+        }
+
+      xfree (head);
+      xfree_null (type);
        return RETRFINISHED;
      }
  
+/* 2005-06-17 SMS.
+   For VMS, define common fopen() optional arguments.
+*/
+#ifdef __VMS
+# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+# define FOPEN_BIN_FLAG 3
+#else /* def __VMS */
+# define FOPEN_BIN_FLAG true
+#endif /* def __VMS [else] */
+
    /* Open the local file.  */
    if (!output_stream)
      {
@@ -2249,12 +2801,44 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
        if (opt.backups)
          rotate_backups (hs->local_file);
        if (hs->restval)
-        fp = fopen (hs->local_file, "ab");
-      else if (ALLOW_CLOBBER)
-        fp = fopen (hs->local_file, "wb");
+        {
+#ifdef __VMS
+          int open_id;
+
+          open_id = 21;
+          fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+          fp = fopen (hs->local_file, "ab");
+#endif /* def __VMS [else] */
+        }
+      else if (ALLOW_CLOBBER || count > 0)
+        {
+         if (opt.unlink && file_exists_p (hs->local_file))
+           {
+             int res = unlink (hs->local_file);
+             if (res < 0)
+               {
+                 logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
+                            strerror (errno));
+                 CLOSE_INVALIDATE (sock);
+                 xfree (head);
+      xfree_null (type);
+                 return UNLINKERR;
+               }
+           }
+
+#ifdef __VMS
+          int open_id;
+
+          open_id = 22;
+          fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+          fp = fopen (hs->local_file, "wb");
+#endif /* def __VMS [else] */
+        }
        else
          {
-          fp = fopen_excl (hs->local_file, true);
+          fp = fopen_excl (hs->local_file, FOPEN_BIN_FLAG);
            if (!fp && errno == EEXIST)
              {
                /* We cannot just invent a new name and use it (which is
@@ -2265,6 +2849,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
                           _("%s has sprung into existence.\n"),
                           hs->local_file);
                CLOSE_INVALIDATE (sock);
+              xfree (head);
+              xfree_null (type);
                return FOPEN_EXCL_ERR;
              }
          }
@@ -2272,6 +2858,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
          {
            logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
            CLOSE_INVALIDATE (sock);
+          xfree (head);
+          xfree_null (type);
            return FOPENERR;
          }
      }
@@ -2281,56 +2869,38 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
    /* Print fetch message, if opt.verbose.  */
    if (opt.verbose)
      {
-      logprintf (LOG_NOTQUIET, _("Saving to: %s\n"), 
+      logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
                   HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
      }
-    
-  /* This confuses the timestamping code that checks for file size.
-     #### The timestamping code should be smarter about file size.  */
-  if (opt.save_headers && hs->restval == 0)
-    fwrite (head, 1, strlen (head), fp);
+
+
+  err = read_response_body (hs, sock, fp, contlen, contrange,
+                            chunked_transfer_encoding,
+                            u->url, warc_timestamp_str,
+                            warc_request_uuid, warc_ip, type,
+                            statcode, head);
  
    /* Now we no longer need to store the response header. */
    xfree (head);
-
-  /* Download the request body.  */
-  flags = 0;
-  if (contlen != -1)
-    /* If content-length is present, read that much; otherwise, read
-       until EOF.  The HTTP spec doesn't require the server to
-       actually close the connection when it's done sending data. */
-    flags |= rb_read_exactly;
-  if (hs->restval > 0 && contrange == 0)
-    /* If the server ignored our range request, instruct fd_read_body
-       to skip the first RESTVAL bytes of body.  */
-    flags |= rb_skip_startpos;
-  hs->len = hs->restval;
-  hs->rd_size = 0;
-  hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
-                          hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
-                          flags);
+  xfree_null (type);
  
    if (hs->res >= 0)
      CLOSE_FINISH (sock);
    else
-    {
-      if (hs->res < 0)
-        hs->rderrmsg = xstrdup (fd_errstr (sock));
-      CLOSE_INVALIDATE (sock);
-    }
+    CLOSE_INVALIDATE (sock);
  
    if (!output_stream)
      fclose (fp);
-  if (hs->res == -2)
-    return FWRITEERR;
-  return RETRFINISHED;
+
+  return err;
  }
  
  /* The genuine HTTP loop!  This is the part where the retrieval is
     retried, and retried, and retried, and...  */
  uerr_t
-http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
-           int *dt, struct url *proxy)
+http_loop (struct url *u, struct url *original_url, char **newloc,
+           char **local_file, const char *referer, int *dt, struct url *proxy,
+           struct iri *iri)
  {
    int count;
    bool got_head = false;         /* used for time-stamping and filename detection */
@@ -2341,16 +2911,24 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
    uerr_t err, ret = TRYLIMEXC;
    time_t tmr = -1;               /* remote time-stamp */
    struct http_stat hstat;        /* HTTP status */
-  struct_stat st;  
+  struct_stat st;
    bool send_head_first = true;
+  char *file_name;
+  bool force_full_retrieve = false;
+
+
+  /* If we are writing to a WARC file: always retrieve the whole file. */
+  if (opt.warc_filename != NULL)
+    force_full_retrieve = true;
+
  
    /* Assert that no value for *LOCAL_FILE was passed. */
    assert (local_file == NULL || *local_file == NULL);
-  
+
    /* Set LOCAL_FILE parameter. */
    if (local_file && opt.output_document)
      *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-  
+
    /* Reset NEWLOC parameter. */
    *newloc = NULL;
  
@@ -2358,7 +2936,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
       here so that we don't go through the hoops if we're just using
       FTP or whatever. */
    if (opt.cookies)
-    load_cookies();
+    load_cookies ();
  
    /* Warn on (likely bogus) wildcard usage in HTTP. */
    if (opt.ftp_glob && has_wildcards_p (u->path))
@@ -2375,61 +2953,49 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
      }
    else if (!opt.content_disposition)
      {
-      hstat.local_file = url_file_name (u);
+      hstat.local_file =
+        url_file_name (opt.trustservernames ? u : original_url, NULL);
        got_name = true;
      }
  
-  /* TODO: Ick! This code is now in both gethttp and http_loop, and is
-   * screaming for some refactoring. */
    if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
      {
        /* If opt.noclobber is turned on and file already exists, do not
           retrieve the file. But if the output_document was given, then this
           test was already done and the file didn't exist. Hence the !opt.output_document */
-      logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"), 
-                 quote (hstat.local_file));
-      /* If the file is there, we suppose it's retrieved OK.  */
-      *dt |= RETROKF;
-
-      /* #### Bogusness alert.  */
-      /* If its suffix is "html" or "htm" or similar, assume text/html.  */
-      if (has_html_suffix_p (hstat.local_file))
-        *dt |= TEXTHTML;
-
+      get_file_flags (hstat.local_file, dt);
        ret = RETROK;
        goto exit;
      }
  
    /* Reset the counter. */
    count = 0;
-  
+
    /* Reset the document type. */
    *dt = 0;
-  
-  /* Skip preliminary HEAD request if we're not in spider mode AND
-   * if -O was given or HTTP Content-Disposition support is disabled. */
-  if (!opt.spider
-      && (got_name || !opt.content_disposition))
+
+  /* Skip preliminary HEAD request if we're not in spider mode.  */
+  if (!opt.spider)
      send_head_first = false;
  
-  /* Send preliminary HEAD request if -N is given and we have an existing 
+  /* Send preliminary HEAD request if -N is given and we have an existing
     * destination file. */
-  if (opt.timestamping 
-      && !opt.content_disposition
-      && file_exists_p (url_file_name (u)))
+  file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
+  if (opt.timestamping && (file_exists_p (file_name)
+                           || opt.content_disposition))
      send_head_first = true;
-  
+  xfree (file_name);
+
    /* THE loop */
    do
      {
        /* Increment the pass counter.  */
        ++count;
        sleep_between_retrievals (count);
-      
+
        /* Get the current time string.  */
        tms = datetime_str (time (NULL));
-      
+
        if (opt.spider && !got_head)
          logprintf (LOG_VERBOSE, _("\
  Spider mode enabled. Check if remote file exists.\n"));
@@ -2438,20 +3004,20 @@ Spider mode enabled. Check if remote file exists.\n"));
        if (opt.verbose)
          {
            char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-          
-          if (count > 1) 
+
+          if (count > 1)
              {
                char tmp[256];
                sprintf (tmp, _("(try:%2d)"), count);
                logprintf (LOG_NOTQUIET, "--%s--  %s  %s\n",
                           tms, tmp, hurl);
              }
-          else 
+          else
              {
                logprintf (LOG_NOTQUIET, "--%s--  %s\n",
                           tms, hurl);
              }
-          
+
  #ifdef WINDOWS
            ws_changetitle (hurl);
  #endif
@@ -2461,13 +3027,15 @@ Spider mode enabled. Check if remote file exists.\n"));
        /* Default document type is empty.  However, if spider mode is
           on or time-stamping is employed, HEAD_ONLY commands is
           encoded within *dt.  */
-      if (send_head_first && !got_head) 
+      if (send_head_first && !got_head)
          *dt |= HEAD_ONLY;
        else
          *dt &= ~HEAD_ONLY;
  
        /* Decide whether or not to restart.  */
-      if (opt.always_rest
+      if (force_full_retrieve)
+        hstat.restval = hstat.len;
+      else if (opt.always_rest
            && got_name
            && stat (hstat.local_file, &st) == 0
            && S_ISREG (st.st_mode))
@@ -2494,11 +3062,11 @@ Spider mode enabled. Check if remote file exists.\n"));
          *dt &= ~SEND_NOCACHE;
  
        /* Try fetching the document, or at least its head.  */
-      err = gethttp (u, &hstat, dt, proxy);
+      err = gethttp (u, &hstat, dt, proxy, iri, count);
  
        /* Time?  */
        tms = datetime_str (time (NULL));
-      
+
        /* Get the new location (with or without the redirection).  */
        if (hstat.newloc)
          *newloc = xstrdup (hstat.newloc);
@@ -2518,17 +3086,37 @@ Spider mode enabled. Check if remote file exists.\n"));
            logputs (LOG_VERBOSE, "\n");
            logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
                       quote (hstat.local_file), strerror (errno));
-        case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: 
-        case SSLINITFAILED: case CONTNOTSUPPORTED:
+        case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
+        case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR:
            /* Fatal errors just return from the function.  */
            ret = err;
            goto exit;
+        case WARC_ERR:
+          /* A fatal WARC error. */
+          logputs (LOG_VERBOSE, "\n");
+          logprintf (LOG_NOTQUIET, _("Cannot write to WARC file..\n"));
+          ret = err;
+          goto exit;
+        case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR:
+          /* A fatal WARC error. */
+          logputs (LOG_VERBOSE, "\n");
+          logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n"));
+          ret = err;
+          goto exit;
          case CONSSLERR:
            /* Another fatal error.  */
            logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
            ret = err;
            goto exit;
+        case UNLINKERR:
+          /* Another fatal error.  */
+          logputs (LOG_VERBOSE, "\n");
+          logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"),
+                     quote (hstat.local_file), strerror (errno));
+          ret = err;
+          goto exit;
          case NEWLOCATION:
+        case NEWLOCATION_KEEP_POST:
            /* Return the new location to the caller.  */
            if (!*newloc)
              {
@@ -2537,9 +3125,9 @@ Spider mode enabled. Check if remote file exists.\n"));
                           hstat.statcode);
                ret = WRONGCODE;
              }
-          else 
+          else
              {
-              ret = NEWLOCATION;
+              ret = err;
              }
            goto exit;
          case RETRUNNEEDED:
@@ -2553,7 +3141,7 @@ Spider mode enabled. Check if remote file exists.\n"));
            /* All possibilities should have been exhausted.  */
            abort ();
          }
-      
+
        if (!(*dt & RETROKF))
          {
            char *hurl = NULL;
@@ -2572,11 +3160,13 @@ Spider mode enabled. Check if remote file exists.\n"));
                continue;
              }
            /* Maybe we should always keep track of broken links, not just in
-           * spider mode.  */
-          else if (opt.spider)
+           * spider mode.
+           * Don't log error if it was UTF-8 encoded because we will try
+           * once unencoded. */
+          else if (opt.spider && !iri->utf8_encode)
              {
                /* #### Again: ugly ugly ugly! */
-              if (!hurl) 
+              if (!hurl)
                  hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
                nonexisting_url (hurl);
                logprintf (LOG_NOTQUIET, _("\
@@ -2585,7 +3175,7 @@ Remote file does not exist -- broken link!!!\n"));
            else
              {
                logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
-                         tms, hstat.statcode, 
+                         tms, hstat.statcode,
                           quotearg_style (escape_quoting_style, hstat.error));
              }
            logputs (LOG_VERBOSE, "\n");
@@ -2614,7 +3204,7 @@ Last-modified header invalid -- time-stamp ignored.\n"));
                if (*dt & HEAD_ONLY)
                  time_came_from_head = true;
              }
-      
+
            if (send_head_first)
              {
                /* The time-stamping section.  */
@@ -2625,7 +3215,7 @@ Last-modified header invalid -- time-stamp ignored.\n"));
                                                 we're supposed to
                                                 download already exists.  */
                      {
-                      if (hstat.remote_time && 
+                      if (hstat.remote_time &&
                            tmr != (time_t) (-1))
                          {
                            /* Now time-stamping can be used validly.
@@ -2636,7 +3226,7 @@ Last-modified header invalid -- time-stamp ignored.\n"));
                               download procedure is resumed.  */
                            if (hstat.orig_file_tstamp >= tmr)
                              {
-                              if (hstat.contlen == -1 
+                              if (hstat.contlen == -1
                                    || hstat.orig_file_size == hstat.contlen)
                                  {
                                    logprintf (LOG_VERBOSE, _("\
@@ -2653,17 +3243,20 @@ The sizes do not match (local %s) -- retrieving.\n"),
                                  }
                              }
                            else
-                            logputs (LOG_VERBOSE,
-                                     _("Remote file is newer, retrieving.\n"));
+                            {
+                              force_full_retrieve = true;
+                              logputs (LOG_VERBOSE,
+                                       _("Remote file is newer, retrieving.\n"));
+                            }
  
                            logputs (LOG_VERBOSE, "\n");
                          }
                      }
-                  
+
                    /* free_hstat (&hstat); */
                    hstat.timestamp_checked = true;
                  }
-              
+
                if (opt.spider)
                  {
                    bool finished = true;
@@ -2675,7 +3268,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
  Remote file exists and could contain links to other resources -- retrieving.\n\n"));
                            finished = false;
                          }
-                      else 
+                      else
                          {
                            logprintf (LOG_VERBOSE, _("\
  Remote file exists but does not contain any link -- not retrieving.\n\n"));
@@ -2690,18 +3283,18 @@ Remote file exists but does not contain any link -- not retrieving.\n\n"));
  Remote file exists and could contain further links,\n\
  but recursion is disabled -- not retrieving.\n\n"));
                          }
-                      else 
+                      else
                          {
                            logprintf (LOG_VERBOSE, _("\
  Remote file exists.\n\n"));
                          }
                        ret = RETROK; /* RETRUNNEEDED is not for caller. */
                      }
-                  
+
                    if (finished)
                      {
-                      logprintf (LOG_NONVERBOSE, 
-                                 _("%s URL:%s %2d %s\n"), 
+                      logprintf (LOG_NONVERBOSE,
+                                 _("%s URL: %s %2d %s\n"),
                                   tms, u->url, hstat.statcode,
                                   hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
                        goto exit;
@@ -2714,21 +3307,14 @@ Remote file exists.\n\n"));
                continue;
              } /* send_head_first */
          } /* !got_head */
-          
-      if ((tmr != (time_t) (-1))
+
+      if (opt.useservertimestamps
+          && (tmr != (time_t) (-1))
            && ((hstat.len == hstat.contlen) ||
                ((hstat.res == 0) && (hstat.contlen == -1))))
          {
-          /* #### This code repeats in http.c and ftp.c.  Move it to a
-             function!  */
            const char *fl = NULL;
-          if (opt.output_document)
-            {
-              if (output_stream_regular)
-                fl = opt.output_document;
-            }
-          else
-            fl = hstat.local_file;
+          set_local_file (&fl, hstat.local_file);
            if (fl)
              {
                time_t newtmr = -1;
@@ -2737,7 +3323,7 @@ Remote file exists.\n\n"));
                    && hstat.remote_time && hstat.remote_time[0])
                  {
                    newtmr = http_atotm (hstat.remote_time);
-                  if (newtmr != -1)
+                  if (newtmr != (time_t)-1)
                      tmr = newtmr;
                  }
                touch (fl, tmr);
@@ -2752,9 +3338,14 @@ Remote file exists.\n\n"));
          {
            if (*dt & RETROKF)
              {
+              bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
                logprintf (LOG_VERBOSE,
-                         _("%s (%s) - %s saved [%s/%s]\n\n"),
-                         tms, tmrate, quote (hstat.local_file),
+                         write_to_stdout
+                         ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+                         : _("%s (%s) - %s saved [%s/%s]\n\n"),
+                         tms, tmrate,
+                         write_to_stdout ? "" : quote (hstat.local_file),
                           number_to_static_string (hstat.len),
                           number_to_static_string (hstat.contlen));
                logprintf (LOG_NONVERBOSE,
@@ -2764,14 +3355,14 @@ Remote file exists.\n\n"));
                           number_to_static_string (hstat.contlen),
                           hstat.local_file, count);
              }
-          ++opt.numurls;
-          total_downloaded_bytes += hstat.len;
+          ++numurls;
+          total_downloaded_bytes += hstat.rd_size;
  
            /* Remember that we downloaded the file for later ".orig" code. */
            if (*dt & ADDED_HTML_EXTENSION)
-            downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+            downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
            else
-            downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+            downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
  
            ret = RETROK;
            goto exit;
@@ -2779,28 +3370,33 @@ Remote file exists.\n\n"));
        else if (hstat.res == 0) /* No read error */
          {
            if (hstat.contlen == -1)  /* We don't know how much we were supposed
-                                       to get, so assume we succeeded. */ 
+                                       to get, so assume we succeeded. */
              {
                if (*dt & RETROKF)
                  {
+                  bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
                    logprintf (LOG_VERBOSE,
-                             _("%s (%s) - %s saved [%s]\n\n"),
-                             tms, tmrate, quote (hstat.local_file),
+                             write_to_stdout
+                             ? _("%s (%s) - written to stdout %s[%s]\n\n")
+                             : _("%s (%s) - %s saved [%s]\n\n"),
+                             tms, tmrate,
+                             write_to_stdout ? "" : quote (hstat.local_file),
                               number_to_static_string (hstat.len));
                    logprintf (LOG_NONVERBOSE,
                               "%s URL:%s [%s] -> \"%s\" [%d]\n",
                               tms, u->url, number_to_static_string (hstat.len),
                               hstat.local_file, count);
                  }
-              ++opt.numurls;
-              total_downloaded_bytes += hstat.len;
+              ++numurls;
+              total_downloaded_bytes += hstat.rd_size;
  
                /* Remember that we downloaded the file for later ".orig" code. */
                if (*dt & ADDED_HTML_EXTENSION)
-                downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+                downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
                else
-                downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
-              
+                downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+
                ret = RETROK;
                goto exit;
              }
@@ -2854,10 +3450,10 @@ Remote file exists.\n\n"));
    while (!opt.ntry || (count < opt.ntry));
  
  exit:
-  if (ret == RETROK) 
+  if (ret == RETROK && local_file)
      *local_file = xstrdup (hstat.local_file);
    free_hstat (&hstat);
-  
+
    return ret;
  }
  \f
@@ -2929,6 +3525,7 @@ http_atotm (const char *time_string)
                                     Netscape cookie specification.) */
    };
    const char *oldlocale;
+  char savedlocale[256];
    size_t i;
    time_t ret = (time_t) -1;
  
@@ -2936,6 +3533,16 @@ http_atotm (const char *time_string)
       non-English locales, which we work around by temporarily setting
       locale to C before invoking strptime.  */
    oldlocale = setlocale (LC_TIME, NULL);
+  if (oldlocale)
+    {
+      size_t l = strlen (oldlocale) + 1;
+      if (l >= sizeof savedlocale)
+        savedlocale[0] = '\0';
+      else
+        memcpy (savedlocale, oldlocale, l);
+    }
+  else savedlocale[0] = '\0';
+
    setlocale (LC_TIME, "C");
  
    for (i = 0; i < countof (time_formats); i++)
@@ -2955,7 +3562,8 @@ http_atotm (const char *time_string)
      }
  
    /* Restore the previous locale. */
-  setlocale (LC_TIME, oldlocale);
+  if (savedlocale[0])
+    setlocale (LC_TIME, savedlocale);
  
    return ret;
  }
@@ -3008,7 +3616,7 @@ dump_hash (char *buf, const unsigned char *hash)
  {
    int i;
  
-  for (i = 0; i < MD5_HASHLEN; i++, hash++)
+  for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++)
      {
        *buf++ = XNUM_TO_digit (*hash >> 4);
        *buf++ = XNUM_TO_digit (*hash & 0xf);
@@ -3061,37 +3669,37 @@ digest_authentication_encode (const char *au, const char *user,
  
    /* Calculate the digest value.  */
    {
-    ALLOCA_MD5_CONTEXT (ctx);
-    unsigned char hash[MD5_HASHLEN];
-    char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
-    char response_digest[MD5_HASHLEN * 2 + 1];
+    struct md5_ctx ctx;
+    unsigned char hash[MD5_DIGEST_SIZE];
+    char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1];
+    char response_digest[MD5_DIGEST_SIZE * 2 + 1];
  
      /* A1BUF = H(user ":" realm ":" password) */
-    gen_md5_init (ctx);
-    gen_md5_update ((unsigned char *)user, strlen (user), ctx);
-    gen_md5_update ((unsigned char *)":", 1, ctx);
-    gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
-    gen_md5_update ((unsigned char *)":", 1, ctx);
-    gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
-    gen_md5_finish (ctx, hash);
+    md5_init_ctx (&ctx);
+    md5_process_bytes ((unsigned char *)user, strlen (user), &ctx);
+    md5_process_bytes ((unsigned char *)":", 1, &ctx);
+    md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx);
+    md5_process_bytes ((unsigned char *)":", 1, &ctx);
+    md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx);
+    md5_finish_ctx (&ctx, hash);
      dump_hash (a1buf, hash);
  
      /* A2BUF = H(method ":" path) */
-    gen_md5_init (ctx);
-    gen_md5_update ((unsigned char *)method, strlen (method), ctx);
-    gen_md5_update ((unsigned char *)":", 1, ctx);
-    gen_md5_update ((unsigned char *)path, strlen (path), ctx);
-    gen_md5_finish (ctx, hash);
+    md5_init_ctx (&ctx);
+    md5_process_bytes ((unsigned char *)method, strlen (method), &ctx);
+    md5_process_bytes ((unsigned char *)":", 1, &ctx);
+    md5_process_bytes ((unsigned char *)path, strlen (path), &ctx);
+    md5_finish_ctx (&ctx, hash);
      dump_hash (a2buf, hash);
  
      /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
-    gen_md5_init (ctx);
-    gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
-    gen_md5_update ((unsigned char *)":", 1, ctx);
-    gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
-    gen_md5_update ((unsigned char *)":", 1, ctx);
-    gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
-    gen_md5_finish (ctx, hash);
+    md5_init_ctx (&ctx);
+    md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
+    md5_process_bytes ((unsigned char *)":", 1, &ctx);
+    md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
+    md5_process_bytes ((unsigned char *)":", 1, &ctx);
+    md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
+    md5_finish_ctx (&ctx, hash);
      dump_hash (response_digest, hash);
  
      res = xmalloc (strlen (user)
@@ -3099,7 +3707,7 @@ digest_authentication_encode (const char *au, const char *user,
                     + strlen (realm)
                     + strlen (nonce)
                     + strlen (path)
-                   + 2 * MD5_HASHLEN /*strlen (response_digest)*/
+                   + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
                     + (opaque ? strlen (opaque) : 0)
                     + 128);
      sprintf (res, "Digest \
@@ -3220,7 +3828,7 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt)
    if (len == 5)
      {
        strncpy (shortext, ext, len - 1);
-      shortext[len - 2] = '\0';
+      shortext[len - 1] = '\0';
      }
  
    if (last_period_in_local_filename == NULL
@@ -3255,32 +3863,28 @@ test_parse_content_disposition()
  {
    int i;
    struct {
-    char *hdrval;    
-    char *opt_dir_prefix;
+    char *hdrval;
      char *filename;
      bool result;
    } test_array[] = {
-    { "filename=\"file.ext\"", NULL, "file.ext", true },
-    { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
-    { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
-    { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
-    { "attachment", NULL, NULL, false },
-    { "attachment", "somedir", NULL, false },
+    { "filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+    { "attachment", NULL, false },
+    { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
+    { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true },
    };
-  
-  for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) 
+
+  for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
      {
        char *filename;
        bool res;
  
-      opt.dir_prefix = test_array[i].opt_dir_prefix;
        res = parse_content_disposition (test_array[i].hdrval, &filename);
  
-      mu_assert ("test_parse_content_disposition: wrong result", 
+      mu_assert ("test_parse_content_disposition: wrong result",
                   res == test_array[i].result
-                 && (res == false 
+                 && (res == false
                       || 0 == strcmp (test_array[i].filename, filename)));
      }