Parse html from files when -r -nc is active.

[wget] / src / http.c
diff --git a/src/http.c b/src/http.c

index 99a059e57e2c70a6070f047cfa4cad02e9d78d06..129359cad47e183a01d89ae1aeb25b32b0a6ee3e 100644 (file)
--- a/src/http.c
+++ b/src/http.c
@@ -1,11 +1,12 @@
  /* HTTP support.
-   Copyright (C) 1996-2006 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
  GNU Wget is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.
  
  GNU Wget is distributed in the hope that it will be useful,
@@ -14,20 +15,20 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables.  You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL".  If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so.  If you do not wish to do
-so, delete this exception statement from your version.  */
+Additional permission under GNU GPL version 3 section 7
  
-#include <config.h>
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work.  */
+
+#include "wget.h"
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -40,7 +41,7 @@ so, delete this exception statement from your version.  */
  #include <time.h>
  #include <locale.h>
  
-#include "wget.h"
+#include "hash.h"
  #include "http.h"
  #include "utils.h"
  #include "url.h"
@@ -67,6 +68,14 @@ so, delete this exception statement from your version.  */
  
  extern char *version_string;
  
+/* Forward decls. */
+static char *create_authorization_line (const char *, const char *,
+                                        const char *, const char *,
+                                        const char *, bool *);
+static char *basic_authentication_encode (const char *, const char *);
+static bool known_authentication_scheme_p (const char *, const char *);
+static void load_cookies (void);
+
  #ifndef MIN
  # define MIN(x, y) ((x) > (y) ? (y) : (x))
  #endif
@@ -270,7 +279,7 @@ request_set_user_header (struct request *req, const char *header)
      return;
    BOUNDED_TO_ALLOCA (header, p, name);
    ++p;
-  while (ISSPACE (*p))
+  while (c_isspace (*p))
      ++p;
    request_set_header (req, xstrdup (name), (char *) p, rel_name);
  }
@@ -374,6 +383,58 @@ request_free (struct request *req)
    xfree (req);
  }
  
+static struct hash_table *basic_authed_hosts;
+
+/* Find out if this host has issued a Basic challenge yet; if so, give
+ * it the username, password. A temporary measure until we can get
+ * proper authentication in place. */
+
+static bool
+maybe_send_basic_creds (const char *hostname, const char *user,
+                        const char *passwd, struct request *req)
+{
+  bool do_challenge = false;
+
+  if (opt.auth_without_challenge)
+    {
+      DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+      do_challenge = true;
+    }
+  else if (basic_authed_hosts
+      && hash_table_contains(basic_authed_hosts, hostname))
+    {
+      DEBUGP(("Found `%s' in basic_authed_hosts.\n", hostname));
+      do_challenge = true;
+    }
+  else
+    {
+      DEBUGP(("Host `%s' has not issued a general basic challenge.\n",
+              hostname));
+    }
+  if (do_challenge)
+    {
+      request_set_header (req, "Authorization",
+                          basic_authentication_encode (user, passwd),
+                          rel_value);
+    }
+  return do_challenge;
+}
+
+static void
+register_basic_auth_host (const char *hostname)
+{
+  if (!basic_authed_hosts)
+    {
+      basic_authed_hosts = make_nocase_string_hash_table (1);
+    }
+  if (!hash_table_contains(basic_authed_hosts, hostname))
+    {
+      hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
+      DEBUGP(("Inserted `%s' into basic_authed_hosts\n", hostname));
+    }
+}
+
+
  /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
     PROMISED_SIZE bytes are sent over the wire -- if the file is
     longer, read only that much; if the file is shorter, report an error.  */
@@ -600,9 +661,9 @@ resp_header_locate (const struct response *resp, const char *name, int start,
            && 0 == strncasecmp (b, name, name_len))
          {
            b += name_len + 1;
-          while (b < e && ISSPACE (*b))
+          while (b < e && c_isspace (*b))
              ++b;
-          while (b < e && ISSPACE (e[-1]))
+          while (b < e && c_isspace (e[-1]))
              --e;
            *begptr = b;
            *endptr = e;
@@ -701,17 +762,17 @@ resp_status (const struct response *resp, char **message)
    if (p < end && *p == '/')
      {
        ++p;
-      while (p < end && ISDIGIT (*p))
+      while (p < end && c_isdigit (*p))
          ++p;
        if (p < end && *p == '.')
          ++p; 
-      while (p < end && ISDIGIT (*p))
+      while (p < end && c_isdigit (*p))
          ++p;
      }
  
-  while (p < end && ISSPACE (*p))
+  while (p < end && c_isspace (*p))
      ++p;
-  if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
+  if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2]))
      return -1;
  
    status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
@@ -719,9 +780,9 @@ resp_status (const struct response *resp, char **message)
  
    if (message)
      {
-      while (p < end && ISSPACE (*p))
+      while (p < end && c_isspace (*p))
          ++p;
-      while (p < end && ISSPACE (end[-1]))
+      while (p < end && c_isspace (end[-1]))
          --end;
        *message = strdupdelim (p, end);
      }
@@ -738,6 +799,20 @@ resp_free (struct response *resp)
    xfree (resp);
  }
  
+/* Print a single line of response, the characters [b, e).  We tried
+   getting away with
+      logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
+   but that failed to escape the non-printable characters and, in fact,
+   caused crashes in UTF-8 locales.  */
+
+static void
+print_response_line(const char *prefix, const char *b, const char *e)
+{
+  char *copy;
+  BOUNDED_TO_ALLOCA(b, e, copy);
+  logprintf (LOG_ALWAYS, "%s%s\n", prefix, escnonprint(copy));
+}
+
  /* Print the server response, line by line, omitting the trailing CRLF
     from individual header lines, and prefixed with PREFIX.  */
  
@@ -756,9 +831,7 @@ print_server_response (const struct response *resp, const char *prefix)
          --e;
        if (b < e && e[-1] == '\r')
          --e;
-      /* This is safe even on printfs with broken handling of "%.<n>s"
-         because resp->headers ends with \0.  */
-      logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
+      print_response_line(prefix, b, e);
      }
  }
  
@@ -780,27 +853,30 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr,
           HTTP spec. */
        if (*hdr == ':')
          ++hdr;
-      while (ISSPACE (*hdr))
+      while (c_isspace (*hdr))
          ++hdr;
        if (!*hdr)
          return false;
      }
-  if (!ISDIGIT (*hdr))
+  if (!c_isdigit (*hdr))
      return false;
-  for (num = 0; ISDIGIT (*hdr); hdr++)
+  for (num = 0; c_isdigit (*hdr); hdr++)
      num = 10 * num + (*hdr - '0');
-  if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
+  if (*hdr != '-' || !c_isdigit (*(hdr + 1)))
      return false;
    *first_byte_ptr = num;
    ++hdr;
-  for (num = 0; ISDIGIT (*hdr); hdr++)
+  for (num = 0; c_isdigit (*hdr); hdr++)
      num = 10 * num + (*hdr - '0');
-  if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
+  if (*hdr != '/' || !c_isdigit (*(hdr + 1)))
      return false;
    *last_byte_ptr = num;
    ++hdr;
-  for (num = 0; ISDIGIT (*hdr); hdr++)
-    num = 10 * num + (*hdr - '0');
+  if (*hdr == '*')
+    num = -1;
+  else
+    for (num = 0; c_isdigit (*hdr); hdr++)
+      num = 10 * num + (*hdr - '0');
    *entity_length_ptr = num;
    return true;
  }
@@ -870,25 +946,25 @@ skip_short_body (int fd, wgint contlen)
  
  bool
  extract_param (const char **source, param_token *name, param_token *value,
-              char separator)
+               char separator)
  {
    const char *p = *source;
  
-  while (ISSPACE (*p)) ++p;
+  while (c_isspace (*p)) ++p;
    if (!*p)
      {
        *source = p;
-      return false;            /* no error; nothing more to extract */
+      return false;             /* no error; nothing more to extract */
      }
  
    /* Extract name. */
    name->b = p;
-  while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
+  while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p;
    name->e = p;
    if (name->b == name->e)
-    return false;              /* empty name: error */
-  while (ISSPACE (*p)) ++p;
-  if (*p == separator || !*p)          /* no value */
+    return false;               /* empty name: error */
+  while (c_isspace (*p)) ++p;
+  if (*p == separator || !*p)           /* no value */
      {
        xzero (*value);
        if (*p == separator) ++p;
@@ -896,12 +972,12 @@ extract_param (const char **source, param_token *name, param_token *value,
        return true;
      }
    if (*p != '=')
-    return false;              /* error */
+    return false;               /* error */
  
    /* *p is '=', extract value */
    ++p;
-  while (ISSPACE (*p)) ++p;
-  if (*p == '"')               /* quoted */
+  while (c_isspace (*p)) ++p;
+  if (*p == '"')                /* quoted */
      {
        value->b = ++p;
        while (*p && *p != '"') ++p;
@@ -909,20 +985,20 @@ extract_param (const char **source, param_token *name, param_token *value,
          return false;
        value->e = p++;
        /* Currently at closing quote; find the end of param. */
-      while (ISSPACE (*p)) ++p;
+      while (c_isspace (*p)) ++p;
        while (*p && *p != separator) ++p;
        if (*p == separator)
-       ++p;
+        ++p;
        else if (*p)
-       /* garbage after closed quote, e.g. foo="bar"baz */
-       return false;
+        /* garbage after closed quote, e.g. foo="bar"baz */
+        return false;
      }
-  else                         /* unquoted */
+  else                          /* unquoted */
      {
        value->b = p;
        while (*p && *p != separator) ++p;
        value->e = p;
-      while (value->e != value->b && ISSPACE (value->e[-1]))
+      while (value->e != value->b && c_isspace (value->e[-1]))
          --value->e;
        if (*p == separator) ++p;
      }
@@ -957,16 +1033,34 @@ parse_content_disposition (const char *hdr, char **filename)
    while (extract_param (&hdr, &name, &value, ';'))
      if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
        {
-       /* Make the file name begin at the last slash or backslash. */
+        /* Make the file name begin at the last slash or backslash. */
          const char *last_slash = memrchr (value.b, '/', value.e - value.b);
          const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
          if (last_slash && last_bs)
            value.b = 1 + MAX (last_slash, last_bs);
          else if (last_slash || last_bs)
            value.b = 1 + (last_slash ? last_slash : last_bs);
-       if (value.b == value.e)
-         continue;
-        *filename = strdupdelim (value.b, value.e);
+        if (value.b == value.e)
+          continue;
+        /* Start with the directory prefix, if specified. */
+        if (opt.dir_prefix)
+          {
+            int prefix_length = strlen (opt.dir_prefix);
+            bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
+            int total_length;
+
+            if (add_slash) 
+              ++prefix_length;
+            total_length = prefix_length + (value.e - value.b);            
+            *filename = xmalloc (total_length + 1);
+            strcpy (*filename, opt.dir_prefix);
+            if (add_slash) 
+              (*filename)[prefix_length - 1] = '/';
+            memcpy (*filename + prefix_length, value.b, (value.e - value.b));
+            (*filename)[total_length] = '\0';
+          }
+        else
+          *filename = strdupdelim (value.b, value.e);
          return true;
        }
    return false;
@@ -1205,6 +1299,10 @@ struct http_stat
    double dltime;                /* time it took to download the data */
    const char *referer;          /* value of the referer header. */
    char *local_file;             /* local file name. */
+  bool existence_checked;       /* true if we already checked for a file's
+                                   existence after having begun to download
+                                   (needed in gethttp for when connection is
+                                   interrupted/restarted. */
    bool timestamp_checked;       /* true if pre-download time-stamping checks 
                                   * have already been performed */
    char *orig_file_name;         /* name of file to compare for time-stamping
@@ -1230,16 +1328,9 @@ free_hstat (struct http_stat *hs)
    hs->error = NULL;
  }
  
-static char *create_authorization_line (const char *, const char *,
-                                        const char *, const char *,
-                                        const char *, bool *);
-static char *basic_authentication_encode (const char *, const char *);
-static bool known_authentication_scheme_p (const char *, const char *);
-static void load_cookies (void);
-
  #define BEGINS_WITH(line, string_constant)                               \
    (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)    \
-   && (ISSPACE (line[sizeof (string_constant) - 1])                      \
+   && (c_isspace (line[sizeof (string_constant) - 1])                      \
         || !line[sizeof (string_constant) - 1]))
  
  #define SET_USER_AGENT(req) do {                                         \
@@ -1283,10 +1374,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
    int sock = -1;
    int flags;
  
-  /* Set to 1 when the authorization has failed permanently and should
+  /* Set to 1 when the authorization has already been sent and should
       not be tried again. */
    bool auth_finished = false;
  
+  /* Set to 1 when just globally-set Basic authorization has been sent;
+   * should prevent further Basic negotiations, but not other
+   * mechanisms. */
+  bool basic_auth_finished = false;
+
    /* Whether NTLM authentication is used for this request. */
    bool ntlm_seen = false;
  
@@ -1392,66 +1488,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
    user = user ? user : (opt.http_user ? opt.http_user : opt.user);
    passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
  
-  if (user && passwd)
+  if (user && passwd
+      && !u->user) /* We only do "site-wide" authentication with "global"
+                      user/password values; URL user/password info overrides. */
      {
-      /* We have the username and the password, but haven't tried
-         any authorization yet.  Let's see if the "Basic" method
-         works.  If not, we'll come back here and construct a
-         proper authorization method with the right challenges.
-
-         If we didn't employ this kind of logic, every URL that
-         requires authorization would have to be processed twice,
-         which is very suboptimal and generates a bunch of false
-         "unauthorized" errors in the server log.
-
-         #### But this logic also has a serious problem when used
-         with stronger authentications: we *first* transmit the
-         username and the password in clear text, and *then* attempt a
-         stronger authentication scheme.  That cannot be right!  We
-         are only fortunate that almost everyone still uses the
-         `Basic' scheme anyway.
-
-         There should be an option to prevent this from happening, for
-         those who use strong authentication schemes and value their
-         passwords.  */
-      request_set_header (req, "Authorization",
-                          basic_authentication_encode (user, passwd),
-                          rel_value);
-    }
-
-  proxyauth = NULL;
-  if (proxy)
-    {
-      char *proxy_user, *proxy_passwd;
-      /* For normal username and password, URL components override
-         command-line/wgetrc parameters.  With proxy
-         authentication, it's the reverse, because proxy URLs are
-         normally the "permanent" ones, so command-line args
-         should take precedence.  */
-      if (opt.proxy_user && opt.proxy_passwd)
-        {
-          proxy_user = opt.proxy_user;
-          proxy_passwd = opt.proxy_passwd;
-        }
-      else
-        {
-          proxy_user = proxy->user;
-          proxy_passwd = proxy->passwd;
-        }
-      /* #### This does not appear right.  Can't the proxy request,
-         say, `Digest' authentication?  */
-      if (proxy_user && proxy_passwd)
-        proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
-
-      /* If we're using a proxy, we will be connecting to the proxy
-         server.  */
-      conn = proxy;
-
-      /* Proxy authorization over SSL is handled below. */
-#ifdef HAVE_SSL
-      if (u->scheme != SCHEME_HTTPS)
-#endif
-        request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+      /* If this is a host for which we've already received a Basic
+       * challenge, we'll go ahead and send Basic authentication creds. */
+      basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
      }
  
    /* Generate the Host header, HOST:PORT.  Take into account that:
@@ -1524,6 +1567,41 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
       without authorization header fails.  (Expected to happen at least
       for the Digest authorization scheme.)  */
  
+  proxyauth = NULL;
+  if (proxy)
+    {
+      char *proxy_user, *proxy_passwd;
+      /* For normal username and password, URL components override
+         command-line/wgetrc parameters.  With proxy
+         authentication, it's the reverse, because proxy URLs are
+         normally the "permanent" ones, so command-line args
+         should take precedence.  */
+      if (opt.proxy_user && opt.proxy_passwd)
+        {
+          proxy_user = opt.proxy_user;
+          proxy_passwd = opt.proxy_passwd;
+        }
+      else
+        {
+          proxy_user = proxy->user;
+          proxy_passwd = proxy->passwd;
+        }
+      /* #### This does not appear right.  Can't the proxy request,
+         say, `Digest' authentication?  */
+      if (proxy_user && proxy_passwd)
+        proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
+
+      /* If we're using a proxy, we will be connecting to the proxy
+         server.  */
+      conn = proxy;
+
+      /* Proxy authorization over SSL is handled below. */
+#ifdef HAVE_SSL
+      if (u->scheme != SCHEME_HTTPS)
+#endif
+        request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+    }
+
    keep_alive = false;
  
    /* Establish the connection.  */
@@ -1559,19 +1637,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
                 only hurts us.  */
              request_remove_header (req, "Authorization");
          }
-    }
-
-  if (sock < 0)
-    {
-      /* In its current implementation, persistent_available_p will
-         look up conn->host in some cases.  If that lookup failed, we
-         don't need to bother with connect_to_host.  */
-      if (host_lookup_failed)
+      else if (host_lookup_failed)
          {
            request_free (req);
+          logprintf(LOG_NOTQUIET,
+                    _("%s: unable to resolve host address `%s'\n"),
+                    exec_name, relevant->host);
            return HOSTERR;
          }
+    }
  
+  if (sock < 0)
+    {
        sock = connect_to_host (conn->host, conn->port);
        if (sock == E_HOST)
          {
@@ -1742,7 +1819,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
      }
    
    /* TODO: perform this check only once. */
-  if (file_exists_p (hs->local_file))
+  if (!hs->existence_checked && file_exists_p (hs->local_file))
      {
        if (opt.noclobber)
          {
@@ -1758,7 +1835,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
            if (has_html_suffix_p (hs->local_file))
              *dt |= TEXTHTML;
  
-          return RETROK;
+          return RETRUNNEEDED;
          }
        else if (!ALLOW_CLOBBER)
          {
@@ -1768,6 +1845,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
            hs->local_file = unique;
          }
      }
+  hs->existence_checked = true;
  
    /* Support timestamping */
    /* TODO: move this code out of gethttp. */
@@ -1837,12 +1915,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
        errno = 0;
        parsed = str_to_wgint (hdrval, NULL, 10);
        if (parsed == WGINT_MAX && errno == ERANGE)
-        /* Out of range.
-           #### If Content-Length is out of range, it most likely
-           means that the file is larger than 2G and that we're
-           compiled without LFS.  In that case we should probably
-           refuse to even attempt to download the file.  */
-        contlen = -1;
+        {
+          /* Out of range.
+             #### If Content-Length is out of range, it most likely
+             means that the file is larger than 2G and that we're
+             compiled without LFS.  In that case we should probably
+             refuse to even attempt to download the file.  */
+          contlen = -1;
+        }
+      else if (parsed < 0)
+        {
+          /* Negative Content-Length; nonsensical, so we can't
+             assume any information about the content to receive. */
+          contlen = -1;
+        }
        else
          contlen = parsed;
      }
@@ -1890,16 +1976,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
                }
  
            if (!www_authenticate)
-            /* If the authentication header is missing or
-               unrecognized, there's no sense in retrying.  */
-            logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
-          else if (BEGINS_WITH (www_authenticate, "Basic"))
-            /* If the authentication scheme is "Basic", which we send
-               by default, there's no sense in retrying either.  (This
-               should be changed when we stop sending "Basic" data by
-               default.)  */
-            ;
-          else
+            {
+              /* If the authentication header is missing or
+                 unrecognized, there's no sense in retrying.  */
+              logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+            }
+          else if (!basic_auth_finished
+                   || !BEGINS_WITH (www_authenticate, "Basic"))
              {
                char *pth;
                pth = url_full_path (u);
@@ -1912,9 +1995,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
                                    rel_value);
                if (BEGINS_WITH (www_authenticate, "NTLM"))
                  ntlm_seen = true;
+              else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+                {
+                  /* Need to register this host as using basic auth,
+                   * so we automatically send creds next time. */
+                  register_basic_auth_host (u->host);
+                }
                xfree (pth);
                goto retry_with_auth;
              }
+          else
+            {
+              /* We already did Basic auth, and it failed. Gotta
+               * give up. */
+            }
          }
        logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
        request_free (req);
@@ -1943,7 +2037,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
        char *tmp = strchr (type, ';');
        if (tmp)
          {
-          while (tmp > type && ISSPACE (tmp[-1]))
+          while (tmp > type && c_isspace (tmp[-1]))
              --tmp;
            *tmp = '\0';
          }
@@ -1974,7 +2068,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
        wgint first_byte_pos, last_byte_pos, entity_length;
        if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
                                 &entity_length))
-        contrange = first_byte_pos;
+        {
+          contrange = first_byte_pos;
+          contlen = last_byte_pos - first_byte_pos + 1;
+        }
      }
    resp_free (resp);
  
@@ -2074,7 +2171,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
        CLOSE_INVALIDATE (sock);
        return RANGEERR;
      }
-  hs->contlen = contlen + contrange;
+  if (contlen == -1)
+    hs->contlen = -1;
+  else
+    hs->contlen = contlen + contrange;
  
    if (opt.verbose)
      {
@@ -2227,14 +2327,15 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
  {
    int count;
    bool got_head = false;         /* used for time-stamping and filename detection */
+  bool time_came_from_head = false;
    bool got_name = false;
    char *tms;
    const char *tmrate;
    uerr_t err, ret = TRYLIMEXC;
    time_t tmr = -1;               /* remote time-stamp */
-  wgint local_size = 0;          /* the size of the local file */
    struct http_stat hstat;        /* HTTP status */
    struct_stat st;  
+  bool send_head_first = true;
  
    /* Assert that no value for *LOCAL_FILE was passed. */
    assert (local_file == NULL || *local_file == NULL);
@@ -2265,6 +2366,32 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
        hstat.local_file = xstrdup (opt.output_document);
        got_name = true;
      }
+  else if (!opt.content_disposition)
+    {
+      hstat.local_file = url_file_name (u);
+      got_name = true;
+    }
+
+  /* TODO: Ick! This code is now in both gethttp and http_loop, and is
+   * screaming for some refactoring. */
+  if (got_name && file_exists_p (hstat.local_file) && opt.noclobber)
+    {
+      /* If opt.noclobber is turned on and file already exists, do not
+         retrieve the file */
+      logprintf (LOG_VERBOSE, _("\
+File `%s' already there; not retrieving.\n\n"), 
+                 hstat.local_file);
+      /* If the file is there, we suppose it's retrieved OK.  */
+      *dt |= RETROKF;
+
+      /* #### Bogusness alert.  */
+      /* If its suffix is "html" or "htm" or similar, assume text/html.  */
+      if (has_html_suffix_p (hstat.local_file))
+        *dt |= TEXTHTML;
+
+      ret = RETROK;
+      goto exit;
+    }
  
    /* Reset the counter. */
    count = 0;
@@ -2272,6 +2399,19 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
    /* Reset the document type. */
    *dt = 0;
    
+  /* Skip preliminary HEAD request if we're not in spider mode AND
+   * if -O was given or HTTP Content-Disposition support is disabled. */
+  if (!opt.spider
+      && (got_name || !opt.content_disposition))
+    send_head_first = false;
+
+  /* Send preliminary HEAD request if -N is given and we have an existing 
+   * destination file. */
+  if (opt.timestamping 
+      && !opt.content_disposition
+      && file_exists_p (url_file_name (u)))
+    send_head_first = true;
+  
    /* THE loop */
    do
      {
@@ -2280,7 +2420,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
        sleep_between_retrievals (count);
        
        /* Get the current time string.  */
-      tms = time_str (time (NULL));
+      tms = datetime_str (time (NULL));
        
        if (opt.spider && !got_head)
          logprintf (LOG_VERBOSE, _("\
@@ -2289,7 +2429,7 @@ Spider mode enabled. Check if remote file exists.\n"));
        /* Print fetch message, if opt.verbose.  */
        if (opt.verbose)
          {
-          char *hurl = url_string (u, true);
+          char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
            
            if (count > 1) 
              {
@@ -2313,8 +2453,7 @@ Spider mode enabled. Check if remote file exists.\n"));
        /* Default document type is empty.  However, if spider mode is
           on or time-stamping is employed, HEAD_ONLY commands is
           encoded within *dt.  */
-      if (((opt.spider || opt.timestamping) && !got_head)
-          || (opt.always_rest && !got_name))
+      if (send_head_first && !got_head) 
          *dt |= HEAD_ONLY;
        else
          *dt &= ~HEAD_ONLY;
@@ -2350,12 +2489,12 @@ Spider mode enabled. Check if remote file exists.\n"));
        err = gethttp (u, &hstat, dt, proxy);
  
        /* Time?  */
-      tms = time_str (time (NULL));
+      tms = datetime_str (time (NULL));
        
        /* Get the new location (with or without the redirection).  */
        if (hstat.newloc)
          *newloc = xstrdup (hstat.newloc);
-      
+
        switch (err)
          {
          case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
@@ -2406,23 +2545,31 @@ Spider mode enabled. Check if remote file exists.\n"));
            /* All possibilities should have been exhausted.  */
            abort ();
          }
-     
+      
        if (!(*dt & RETROKF))
          {
            char *hurl = NULL;
            if (!opt.verbose)
              {
                /* #### Ugly ugly ugly! */
-              hurl = url_string (u, true);
+              hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
                logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
              }
+
+          /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
+          if (*dt & HEAD_ONLY
+              && (hstat.statcode == 500 || hstat.statcode == 501))
+            {
+              got_head = true;
+              continue;
+            }
            /* Maybe we should always keep track of broken links, not just in
             * spider mode.  */
-          if (opt.spider)
+          else if (opt.spider)
              {
                /* #### Again: ugly ugly ugly! */
                if (!hurl) 
-                hurl = url_string (u, true);
+                hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
                nonexisting_url (hurl);
                logprintf (LOG_NOTQUIET, _("\
  Remote file does not exist -- broken link!!!\n"));
@@ -2441,7 +2588,7 @@ Remote file does not exist -- broken link!!!\n"));
        /* Did we get the time-stamp? */
        if (!got_head)
          {
-          bool restart_loop = false;
+          got_head = true;    /* no more time-stamping */
  
            if (opt.timestamping && !hstat.remote_time)
              {
@@ -2455,94 +2602,100 @@ Last-modified header missing -- time-stamps turned off.\n"));
                if (tmr == (time_t) (-1))
                  logputs (LOG_VERBOSE, _("\
  Last-modified header invalid -- time-stamp ignored.\n"));
+              if (*dt & HEAD_ONLY)
+                time_came_from_head = true;
              }
        
-          /* The time-stamping section.  */
-          if (opt.timestamping)
+          if (send_head_first)
              {
-              if (hstat.orig_file_name) /* Perform the following checks only 
-                                           if the file we're supposed to 
-                                           download already exists. */
+              /* The time-stamping section.  */
+              if (opt.timestamping)
                  {
-                  if (hstat.remote_time && 
-                      tmr != (time_t) (-1))
+                  if (hstat.orig_file_name) /* Perform the following
+                                               checks only if the file
+                                               we're supposed to
+                                               download already exists.  */
                      {
-                      /* Now time-stamping can be used validly.  Time-stamping
-                         means that if the sizes of the local and remote file
-                         match, and local file is newer than the remote file,
-                         it will not be retrieved.  Otherwise, the normal
-                         download procedure is resumed.  */
-                      if (hstat.orig_file_tstamp >= tmr)
+                      if (hstat.remote_time && 
+                          tmr != (time_t) (-1))
                          {
-                          if (hstat.contlen == -1 
-                              || hstat.orig_file_size == hstat.contlen)
+                          /* Now time-stamping can be used validly.
+                             Time-stamping means that if the sizes of
+                             the local and remote file match, and local
+                             file is newer than the remote file, it will
+                             not be retrieved.  Otherwise, the normal
+                             download procedure is resumed.  */
+                          if (hstat.orig_file_tstamp >= tmr)
                              {
-                              logprintf (LOG_VERBOSE, _("\
+                              if (hstat.contlen == -1 
+                                  || hstat.orig_file_size == hstat.contlen)
+                                {
+                                  logprintf (LOG_VERBOSE, _("\
  Server file no newer than local file `%s' -- not retrieving.\n\n"),
-                                         hstat.orig_file_name);
-                              ret = RETROK;
-                              goto exit;
-                            }
-                          else
-                            {
-                              logprintf (LOG_VERBOSE, _("\
+                                             hstat.orig_file_name);
+                                  ret = RETROK;
+                                  goto exit;
+                                }
+                              else
+                                {
+                                  logprintf (LOG_VERBOSE, _("\
  The sizes do not match (local %s) -- retrieving.\n"),
-                                         number_to_static_string (local_size));
+                                             number_to_static_string (hstat.orig_file_size));
+                                }
                              }
-                        }
-                      else
-                        logputs (LOG_VERBOSE,
-                                 _("Remote file is newer, retrieving.\n"));
+                          else
+                            logputs (LOG_VERBOSE,
+                                     _("Remote file is newer, retrieving.\n"));
  
-                      logputs (LOG_VERBOSE, "\n");
+                          logputs (LOG_VERBOSE, "\n");
+                        }
                      }
+                  
+                  /* free_hstat (&hstat); */
+                  hstat.timestamp_checked = true;
                  }
                
-              /* free_hstat (&hstat); */
-              hstat.timestamp_checked = true;
-              restart_loop = true;
-            }
-          
-          if (opt.always_rest)
-            {
-              got_name = true;
-              restart_loop = true;
-            }
-          
-          if (opt.spider)
-            {
-              if (opt.recursive)
+              if (opt.spider)
                  {
-                  if (*dt & TEXTHTML)
+                  if (opt.recursive)
                      {
-                      logputs (LOG_VERBOSE, _("\
+                      if (*dt & TEXTHTML)
+                        {
+                          logputs (LOG_VERBOSE, _("\
  Remote file exists and could contain links to other resources -- retrieving.\n\n"));
-                      restart_loop = true;
+                        }
+                      else 
+                        {
+                          logprintf (LOG_VERBOSE, _("\
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+                          ret = RETROK; /* RETRUNNEEDED is not for caller. */
+                          goto exit;
+                        }
                      }
-                  else 
+                  else
                      {
-                      logprintf (LOG_VERBOSE, _("\
-Remote file exists but does not contain any link -- not retrieving.\n\n"));
-                      ret = RETRUNNEEDED;
+                      if (*dt & TEXTHTML)
+                        {
+                          logprintf (LOG_VERBOSE, _("\
+Remote file exists and could contain further links,\n\
+but recursion is disabled -- not retrieving.\n\n"));
+                        }
+                      else 
+                        {
+                          logprintf (LOG_VERBOSE, _("\
+Remote file exists.\n\n"));
+                        }
+                      ret = RETROK; /* RETRUNNEEDED is not for caller. */
                        goto exit;
                      }
                  }
-              else
-                {
-                  logprintf (LOG_VERBOSE, _("\
-Remote file exists but recursion is disabled -- not retrieving.\n\n"));
-                  ret = RETRUNNEEDED;
-                  goto exit;
-                }
-            }
  
-          got_head = true;    /* no more time-stamping */
-          *dt &= ~HEAD_ONLY;
-          count = 0;          /* the retrieve count for HEAD is reset */
-
-          if (restart_loop) 
-            continue;
-        }
+              got_name = true;
+              *dt &= ~HEAD_ONLY;
+              count = 0;          /* the retrieve count for HEAD is reset */
+              continue;
+            } /* send_head_first */
+        } /* !got_head */
            
        if ((tmr != (time_t) (-1))
            && ((hstat.len == hstat.contlen) ||
@@ -2559,7 +2712,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n"));
            else
              fl = hstat.local_file;
            if (fl)
-            touch (fl, tmr);
+            {
+              time_t newtmr = -1;
+              /* Reparse time header, in case it's changed. */
+              if (time_came_from_head
+                  && hstat.remote_time && hstat.remote_time[0])
+                {
+                  newtmr = http_atotm (hstat.remote_time);
+                  if (newtmr != -1)
+                    tmr = newtmr;
+                }
+              touch (fl, tmr);
+            }
          }
        /* End of time-stamping section. */
  
@@ -2684,11 +2848,11 @@ check_end (const char *p)
  {
    if (!p)
      return false;
-  while (ISSPACE (*p))
+  while (c_isspace (*p))
      ++p;
    if (!*p
        || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
-      || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
+      || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1])))
      return true;
    else
      return false;
@@ -2804,7 +2968,7 @@ basic_authentication_encode (const char *user, const char *passwd)
  }
  
  #define SKIP_WS(x) do {                         \
-  while (ISSPACE (*(x)))                        \
+  while (c_isspace (*(x)))                        \
      ++(x);                                      \
  } while (0)
  
@@ -2852,12 +3016,12 @@ digest_authentication_encode (const char *au, const char *user,
      {
        int i;
        for (i = 0; i < countof (options); i++)
-       if (name.e - name.b == strlen (options[i].name)
-           && 0 == strncmp (name.b, options[i].name, name.e - name.b))
-         {
-           *options[i].variable = strdupdelim (value.b, value.e);
-           break;
-         }
+        if (name.e - name.b == strlen (options[i].name)
+            && 0 == strncmp (name.b, options[i].name, name.e - name.b))
+          {
+            *options[i].variable = strdupdelim (value.b, value.e);
+            break;
+          }
      }
    if (!realm || !nonce || !user || !passwd || !path || !method)
      {
@@ -2936,7 +3100,7 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
    ((e) - (b) >= STRSIZE (literal)                       \
     && 0 == strncasecmp (b, literal, STRSIZE (literal))  \
     && ((e) - (b) == STRSIZE (literal)                   \
-       || ISSPACE (b[STRSIZE (literal)])))
+       || c_isspace (b[STRSIZE (literal)])))
  
  static bool
  known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
@@ -2965,7 +3129,7 @@ create_authorization_line (const char *au, const char *user,
  {
    /* We are called only with known schemes, so we can dispatch on the
       first letter. */
-  switch (TOUPPER (*au))
+  switch (c_toupper (*au))
      {
      case 'B':                   /* Basic */
        *finished = true;
@@ -3027,19 +3191,27 @@ test_parse_content_disposition()
    int i;
    struct {
      char *hdrval;    
+    char *opt_dir_prefix;
      char *filename;
      bool result;
    } test_array[] = {
-    { "filename=\"file.ext\"", "file.ext", true },
-    { "attachment; filename=\"file.ext\"", "file.ext", true },
-    { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
-    { "attachment", NULL, false },    
+    { "filename=\"file.ext\"", NULL, "file.ext", true },
+    { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+    { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
+    { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+    { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
+    { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
+    { "attachment", NULL, NULL, false },
+    { "attachment", "somedir", NULL, false },
    };
    
    for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) 
      {
        char *filename;
-      bool res = parse_content_disposition (test_array[i].hdrval, &filename);
+      bool res;
+
+      opt.dir_prefix = test_array[i].opt_dir_prefix;
+      res = parse_content_disposition (test_array[i].hdrval, &filename);
  
        mu_assert ("test_parse_content_disposition: wrong result", 
                   res == test_array[i].result
@@ -3053,6 +3225,6 @@ test_parse_content_disposition()
  #endif /* TESTING */
  
  /*
- * vim: et ts=2 sw=2
+ * vim: et sts=2 sw=2 cino+={s
   */