sjero.net Git - wget/blob - src/http.c

   1 /* HTTP support.
   2    Copyright (C) 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9  (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif
  38 #include <assert.h>
  39 #include <errno.h>
  40 #include <time.h>
  41
  42 #include "wget.h"
  43 #include "utils.h"
  44 #include "url.h"
  45 #include "host.h"
  46 #include "retr.h"
  47 #include "connect.h"
  48 #include "netrc.h"
  49 #ifdef HAVE_SSL
  50 # include "ssl.h"
  51 #endif
  52 #ifdef ENABLE_NTLM
  53 # include "http-ntlm.h"
  54 #endif
  55 #include "cookies.h"
  56 #ifdef ENABLE_DIGEST
  57 # include "gen-md5.h"
  58 #endif
  59 #include "convert.h"
  60
  61 extern char *version_string;
  62 extern LARGE_INT total_downloaded_bytes;
  63
  64 extern FILE *output_stream;
  65 extern int output_stream_regular;
  66
  67 #ifndef MIN
  68 # define MIN(x, y) ((x) > (y) ? (y) : (x))
  69 #endif
  70
  71 \f
  72 static int cookies_loaded_p;
  73 static struct cookie_jar *wget_cookie_jar;
  74
  75 #define TEXTHTML_S "text/html"
  76 #define TEXTXHTML_S "application/xhtml+xml"
  77
  78 /* Some status code validation macros: */
  79 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
  80 #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
  81 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY           \
  82                          || (x) == HTTP_STATUS_MOVED_TEMPORARILY        \
  83                          || (x) == HTTP_STATUS_SEE_OTHER                \
  84                          || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
  85
  86 /* HTTP/1.0 status codes from RFC1945, provided for reference.  */
  87 /* Successful 2xx.  */
  88 #define HTTP_STATUS_OK                  200
  89 #define HTTP_STATUS_CREATED             201
  90 #define HTTP_STATUS_ACCEPTED            202
  91 #define HTTP_STATUS_NO_CONTENT          204
  92 #define HTTP_STATUS_PARTIAL_CONTENTS    206
  93
  94 /* Redirection 3xx.  */
  95 #define HTTP_STATUS_MULTIPLE_CHOICES    300
  96 #define HTTP_STATUS_MOVED_PERMANENTLY   301
  97 #define HTTP_STATUS_MOVED_TEMPORARILY   302
  98 #define HTTP_STATUS_SEE_OTHER           303 /* from HTTP/1.1 */
  99 #define HTTP_STATUS_NOT_MODIFIED        304
 100 #define HTTP_STATUS_TEMPORARY_REDIRECT  307 /* from HTTP/1.1 */
 101
 102 /* Client error 4xx.  */
 103 #define HTTP_STATUS_BAD_REQUEST         400
 104 #define HTTP_STATUS_UNAUTHORIZED        401
 105 #define HTTP_STATUS_FORBIDDEN           403
 106 #define HTTP_STATUS_NOT_FOUND           404
 107 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
 108
 109 /* Server errors 5xx.  */
 110 #define HTTP_STATUS_INTERNAL            500
 111 #define HTTP_STATUS_NOT_IMPLEMENTED     501
 112 #define HTTP_STATUS_BAD_GATEWAY         502
 113 #define HTTP_STATUS_UNAVAILABLE         503
 114 \f
 115 enum rp {
 116   rel_none, rel_name, rel_value, rel_both
 117 };
 118
 119 struct request {
 120   const char *method;
 121   char *arg;
 122
 123   struct request_header {
 124     char *name, *value;
 125     enum rp release_policy;
 126   } *headers;
 127   int hcount, hcapacity;
 128 };
 129
 130 /* Create a new, empty request.  At least request_set_method must be
 131    called before the request can be used.  */
 132
 133 static struct request *
 134 request_new (void)
 135 {
 136   struct request *req = xnew0 (struct request);
 137   req->hcapacity = 8;
 138   req->headers = xnew_array (struct request_header, req->hcapacity);
 139   return req;
 140 }
 141
 142 /* Set the request's method and its arguments.  METH should be a
 143    literal string (or it should outlive the request) because it will
 144    not be freed.  ARG will be freed by request_free.  */
 145
 146 static void
 147 request_set_method (struct request *req, const char *meth, char *arg)
 148 {
 149   req->method = meth;
 150   req->arg = arg;
 151 }
 152
 153 /* Return the method string passed with the last call to
 154    request_set_method.  */
 155
 156 static const char *
 157 request_method (const struct request *req)
 158 {
 159   return req->method;
 160 }
 161
 162 /* Free one header according to the release policy specified with
 163    request_set_header.  */
 164
 165 static void
 166 release_header (struct request_header *hdr)
 167 {
 168   switch (hdr->release_policy)
 169     {
 170     case rel_none:
 171       break;
 172     case rel_name:
 173       xfree (hdr->name);
 174       break;
 175     case rel_value:
 176       xfree (hdr->value);
 177       break;
 178     case rel_both:
 179       xfree (hdr->name);
 180       xfree (hdr->value);
 181       break;
 182     }
 183 }
 184
 185 /* Set the request named NAME to VALUE.  Specifically, this means that
 186    a "NAME: VALUE\r\n" header line will be used in the request.  If a
 187    header with the same name previously existed in the request, its
 188    value will be replaced by this one.  A NULL value means do nothing.
 189
 190    RELEASE_POLICY determines whether NAME and VALUE should be released
 191    (freed) with request_free.  Allowed values are:
 192
 193     - rel_none     - don't free NAME or VALUE
 194     - rel_name     - free NAME when done
 195     - rel_value    - free VALUE when done
 196     - rel_both     - free both NAME and VALUE when done
 197
 198    Setting release policy is useful when arguments come from different
 199    sources.  For example:
 200
 201      // Don't free literal strings!
 202      request_set_header (req, "Pragma", "no-cache", rel_none);
 203
 204      // Don't free a global variable, we'll need it later.
 205      request_set_header (req, "Referer", opt.referer, rel_none);
 206
 207      // Value freshly allocated, free it when done.
 208      request_set_header (req, "Range",
 209                          aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
 210                          rel_value);
 211    */
 212
 213 static void
 214 request_set_header (struct request *req, char *name, char *value,
 215                     enum rp release_policy)
 216 {
 217   struct request_header *hdr;
 218   int i;
 219
 220   if (!value)
 221     {
 222       /* A NULL value is a no-op; if freeing the name is requested,
 223          free it now to avoid leaks.  */
 224       if (release_policy == rel_name || release_policy == rel_both)
 225         xfree (name);
 226       return;
 227     }
 228
 229   for (i = 0; i < req->hcount; i++)
 230     {
 231       hdr = &req->headers[i];
 232       if (0 == strcasecmp (name, hdr->name))
 233         {
 234           /* Replace existing header. */
 235           release_header (hdr);
 236           hdr->name = name;
 237           hdr->value = value;
 238           hdr->release_policy = release_policy;
 239           return;
 240         }
 241     }
 242
 243   /* Install new header. */
 244
 245   if (req->hcount >= req->hcapacity)
 246     {
 247       req->hcapacity <<= 1;
 248       req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
 249     }
 250   hdr = &req->headers[req->hcount++];
 251   hdr->name = name;
 252   hdr->value = value;
 253   hdr->release_policy = release_policy;
 254 }
 255
 256 /* Like request_set_header, but sets the whole header line, as
 257    provided by the user using the `--header' option.  For example,
 258    request_set_user_header (req, "Foo: bar") works just like
 259    request_set_header (req, "Foo", "bar").  */
 260
 261 static void
 262 request_set_user_header (struct request *req, const char *header)
 263 {
 264   char *name;
 265   const char *p = strchr (header, ':');
 266   if (!p)
 267     return;
 268   BOUNDED_TO_ALLOCA (header, p, name);
 269   ++p;
 270   while (ISSPACE (*p))
 271     ++p;
 272   request_set_header (req, xstrdup (name), (char *) p, rel_name);
 273 }
 274
 275 /* Remove the header with specified name from REQ.  Returns 1 if the
 276    header was actually removed, 0 otherwise.  */
 277
 278 static int
 279 request_remove_header (struct request *req, char *name)
 280 {
 281   int i;
 282   for (i = 0; i < req->hcount; i++)
 283     {
 284       struct request_header *hdr = &req->headers[i];
 285       if (0 == strcasecmp (name, hdr->name))
 286         {
 287           release_header (hdr);
 288           /* Move the remaining headers by one. */
 289           if (i < req->hcount - 1)
 290             memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
 291           --req->hcount;
 292           return 1;
 293         }
 294     }
 295   return 0;
 296 }
 297
 298 #define APPEND(p, str) do {                     \
 299   int A_len = strlen (str);                     \
 300   memcpy (p, str, A_len);                       \
 301   p += A_len;                                   \
 302 } while (0)
 303
 304 /* Construct the request and write it to FD using fd_write.  */
 305
 306 static int
 307 request_send (const struct request *req, int fd)
 308 {
 309   char *request_string, *p;
 310   int i, size, write_error;
 311
 312   /* Count the request size. */
 313   size = 0;
 314
 315   /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
 316   size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
 317
 318   for (i = 0; i < req->hcount; i++)
 319     {
 320       struct request_header *hdr = &req->headers[i];
 321       /* NAME ": " VALUE "\r\n" */
 322       size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
 323     }
 324
 325   /* "\r\n\0" */
 326   size += 3;
 327
 328   p = request_string = alloca_array (char, size);
 329
 330   /* Generate the request. */
 331
 332   APPEND (p, req->method); *p++ = ' ';
 333   APPEND (p, req->arg);    *p++ = ' ';
 334   memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
 335
 336   for (i = 0; i < req->hcount; i++)
 337     {
 338       struct request_header *hdr = &req->headers[i];
 339       APPEND (p, hdr->name);
 340       *p++ = ':', *p++ = ' ';
 341       APPEND (p, hdr->value);
 342       *p++ = '\r', *p++ = '\n';
 343     }
 344
 345   *p++ = '\r', *p++ = '\n', *p++ = '\0';
 346   assert (p - request_string == size);
 347
 348 #undef APPEND
 349
 350   DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
 351
 352   /* Send the request to the server. */
 353
 354   write_error = fd_write (fd, request_string, size - 1, -1);
 355   if (write_error < 0)
 356     logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
 357                strerror (errno));
 358   return write_error;
 359 }
 360
 361 /* Release the resources used by REQ. */
 362
 363 static void
 364 request_free (struct request *req)
 365 {
 366   int i;
 367   xfree_null (req->arg);
 368   for (i = 0; i < req->hcount; i++)
 369     release_header (&req->headers[i]);
 370   xfree_null (req->headers);
 371   xfree (req);
 372 }
 373
 374 /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
 375    PROMISED_SIZE bytes are sent over the wire -- if the file is
 376    longer, read only that much; if the file is shorter, report an error.  */
 377
 378 static int
 379 post_file (int sock, const char *file_name, wgint promised_size)
 380 {
 381   static char chunk[8192];
 382   wgint written = 0;
 383   int write_error;
 384   FILE *fp;
 385
 386   DEBUGP (("[writing POST file %s ... ", file_name));
 387
 388   fp = fopen (file_name, "rb");
 389   if (!fp)
 390     return -1;
 391   while (!feof (fp) && written < promised_size)
 392     {
 393       int towrite;
 394       int length = fread (chunk, 1, sizeof (chunk), fp);
 395       if (length == 0)
 396         break;
 397       towrite = MIN (promised_size - written, length);
 398       write_error = fd_write (sock, chunk, towrite, -1);
 399       if (write_error < 0)
 400         {
 401           fclose (fp);
 402           return -1;
 403         }
 404       written += towrite;
 405     }
 406   fclose (fp);
 407
 408   /* If we've written less than was promised, report a (probably
 409      nonsensical) error rather than break the promise.  */
 410   if (written < promised_size)
 411     {
 412       errno = EINVAL;
 413       return -1;
 414     }
 415
 416   assert (written == promised_size);
 417   DEBUGP (("done]\n"));
 418   return 0;
 419 }
 420 \f
 421 static const char *
 422 response_head_terminator (const char *hunk, int oldlen, int peeklen)
 423 {
 424   const char *start, *end;
 425
 426   /* If at first peek, verify whether HUNK starts with "HTTP".  If
 427      not, this is a HTTP/0.9 request and we must bail out without
 428      reading anything.  */
 429   if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
 430     return hunk;
 431
 432   if (oldlen < 4)
 433     start = hunk;
 434   else
 435     start = hunk + oldlen - 4;
 436   end = hunk + oldlen + peeklen;
 437
 438   for (; start < end - 1; start++)
 439     if (*start == '\n')
 440       {
 441         if (start < end - 2
 442             && start[1] == '\r'
 443             && start[2] == '\n')
 444           return start + 3;
 445         if (start[1] == '\n')
 446           return start + 2;
 447       }
 448   return NULL;
 449 }
 450
 451 /* The maximum size of a single HTTP response we care to read.  This
 452    is not meant to impose an arbitrary limit, but to protect the user
 453    from Wget slurping up available memory upon encountering malicious
 454    or buggy server output.  Define it to 0 to remove the limit.  */
 455
 456 #define HTTP_RESPONSE_MAX_SIZE 65536
 457
 458 /* Read the HTTP request head from FD and return it.  The error
 459    conditions are the same as with fd_read_hunk.
 460
 461    To support HTTP/0.9 responses, this function tries to make sure
 462    that the data begins with "HTTP".  If this is not the case, no data
 463    is read and an empty request is returned, so that the remaining
 464    data can be treated as body.  */
 465
 466 static char *
 467 read_http_response_head (int fd)
 468 {
 469   return fd_read_hunk (fd, response_head_terminator, 512,
 470                        HTTP_RESPONSE_MAX_SIZE);
 471 }
 472
 473 struct response {
 474   /* The response data. */
 475   const char *data;
 476
 477   /* The array of pointers that indicate where each header starts.
 478      For example, given this HTTP response:
 479
 480        HTTP/1.0 200 Ok
 481        Description: some
 482         text
 483        Etag: x
 484
 485      The headers are located like this:
 486
 487      "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
 488      ^                   ^                             ^          ^
 489      headers[0]          headers[1]                    headers[2] headers[3]
 490
 491      I.e. headers[0] points to the beginning of the request,
 492      headers[1] points to the end of the first header and the
 493      beginning of the second one, etc.  */
 494
 495   const char **headers;
 496 };
 497
 498 /* Create a new response object from the text of the HTTP response,
 499    available in HEAD.  That text is automatically split into
 500    constituent header lines for fast retrieval using
 501    resp_header_*.  */
 502
 503 static struct response *
 504 resp_new (const char *head)
 505 {
 506   const char *hdr;
 507   int count, size;
 508
 509   struct response *resp = xnew0 (struct response);
 510   resp->data = head;
 511
 512   if (*head == '\0')
 513     {
 514       /* Empty head means that we're dealing with a headerless
 515          (HTTP/0.9) response.  In that case, don't set HEADERS at
 516          all.  */
 517       return resp;
 518     }
 519
 520   /* Split HEAD into header lines, so that resp_header_* functions
 521      don't need to do this over and over again.  */
 522
 523   size = count = 0;
 524   hdr = head;
 525   while (1)
 526     {
 527       DO_REALLOC (resp->headers, size, count + 1, const char *);
 528       resp->headers[count++] = hdr;
 529
 530       /* Break upon encountering an empty line. */
 531       if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
 532         break;
 533
 534       /* Find the end of HDR, including continuations. */
 535       do
 536         {
 537           const char *end = strchr (hdr, '\n');
 538           if (end)
 539             hdr = end + 1;
 540           else
 541             hdr += strlen (hdr);
 542         }
 543       while (*hdr == ' ' || *hdr == '\t');
 544     }
 545   DO_REALLOC (resp->headers, size, count + 1, const char *);
 546   resp->headers[count] = NULL;
 547
 548   return resp;
 549 }
 550
 551 /* Locate the header named NAME in the request data, starting with
 552    position START.  This allows the code to loop through the request
 553    data, filtering for all requests of a given name.  Returns the
 554    found position, or -1 for failure.  The code that uses this
 555    function typically looks like this:
 556
 557      for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
 558        ... do something with header ...
 559
 560    If you only care about one header, use resp_header_get instead of
 561    this function.  */
 562
 563 static int
 564 resp_header_locate (const struct response *resp, const char *name, int start,
 565                     const char **begptr, const char **endptr)
 566 {
 567   int i;
 568   const char **headers = resp->headers;
 569   int name_len;
 570
 571   if (!headers || !headers[1])
 572     return -1;
 573
 574   name_len = strlen (name);
 575   if (start > 0)
 576     i = start;
 577   else
 578     i = 1;
 579
 580   for (; headers[i + 1]; i++)
 581     {
 582       const char *b = headers[i];
 583       const char *e = headers[i + 1];
 584       if (e - b > name_len
 585           && b[name_len] == ':'
 586           && 0 == strncasecmp (b, name, name_len))
 587         {
 588           b += name_len + 1;
 589           while (b < e && ISSPACE (*b))
 590             ++b;
 591           while (b < e && ISSPACE (e[-1]))
 592             --e;
 593           *begptr = b;
 594           *endptr = e;
 595           return i;
 596         }
 597     }
 598   return -1;
 599 }
 600
 601 /* Find and retrieve the header named NAME in the request data.  If
 602    found, set *BEGPTR to its starting, and *ENDPTR to its ending
 603    position, and return 1.  Otherwise return 0.
 604
 605    This function is used as a building block for resp_header_copy
 606    and resp_header_strdup.  */
 607
 608 static int
 609 resp_header_get (const struct response *resp, const char *name,
 610                  const char **begptr, const char **endptr)
 611 {
 612   int pos = resp_header_locate (resp, name, 0, begptr, endptr);
 613   return pos != -1;
 614 }
 615
 616 /* Copy the response header named NAME to buffer BUF, no longer than
 617    BUFSIZE (BUFSIZE includes the terminating 0).  If the header
 618    exists, 1 is returned, otherwise 0.  If there should be no limit on
 619    the size of the header, use resp_header_strdup instead.
 620
 621    If BUFSIZE is 0, no data is copied, but the boolean indication of
 622    whether the header is present is still returned.  */
 623
 624 static int
 625 resp_header_copy (const struct response *resp, const char *name,
 626                   char *buf, int bufsize)
 627 {
 628   const char *b, *e;
 629   if (!resp_header_get (resp, name, &b, &e))
 630     return 0;
 631   if (bufsize)
 632     {
 633       int len = MIN (e - b, bufsize - 1);
 634       memcpy (buf, b, len);
 635       buf[len] = '\0';
 636     }
 637   return 1;
 638 }
 639
 640 /* Return the value of header named NAME in RESP, allocated with
 641    malloc.  If such a header does not exist in RESP, return NULL.  */
 642
 643 static char *
 644 resp_header_strdup (const struct response *resp, const char *name)
 645 {
 646   const char *b, *e;
 647   if (!resp_header_get (resp, name, &b, &e))
 648     return NULL;
 649   return strdupdelim (b, e);
 650 }
 651
 652 /* Parse the HTTP status line, which is of format:
 653
 654    HTTP-Version SP Status-Code SP Reason-Phrase
 655
 656    The function returns the status-code, or -1 if the status line
 657    appears malformed.  The pointer to "reason-phrase" message is
 658    returned in *MESSAGE.  */
 659
 660 static int
 661 resp_status (const struct response *resp, char **message)
 662 {
 663   int status;
 664   const char *p, *end;
 665
 666   if (!resp->headers)
 667     {
 668       /* For a HTTP/0.9 response, assume status 200. */
 669       if (message)
 670         *message = xstrdup (_("No headers, assuming HTTP/0.9"));
 671       return 200;
 672     }
 673
 674   p = resp->headers[0];
 675   end = resp->headers[1];
 676
 677   if (!end)
 678     return -1;
 679
 680   /* "HTTP" */
 681   if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
 682     return -1;
 683   p += 4;
 684
 685   /* Match the HTTP version.  This is optional because Gnutella
 686      servers have been reported to not specify HTTP version.  */
 687   if (p < end && *p == '/')
 688     {
 689       ++p;
 690       while (p < end && ISDIGIT (*p))
 691         ++p;
 692       if (p < end && *p == '.')
 693         ++p;
 694       while (p < end && ISDIGIT (*p))
 695         ++p;
 696     }
 697
 698   while (p < end && ISSPACE (*p))
 699     ++p;
 700   if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
 701     return -1;
 702
 703   status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
 704   p += 3;
 705
 706   if (message)
 707     {
 708       while (p < end && ISSPACE (*p))
 709         ++p;
 710       while (p < end && ISSPACE (end[-1]))
 711         --end;
 712       *message = strdupdelim (p, end);
 713     }
 714
 715   return status;
 716 }
 717
 718 /* Release the resources used by RESP.  */
 719
 720 static void
 721 resp_free (struct response *resp)
 722 {
 723   xfree_null (resp->headers);
 724   xfree (resp);
 725 }
 726
 727 /* Print the server response, line by line, omitting the trailing CRLF
 728    from individual header lines, and prefixed with PREFIX.  */
 729
 730 static void
 731 print_server_response (const struct response *resp, const char *prefix)
 732 {
 733   int i;
 734   if (!resp->headers)
 735     return;
 736   for (i = 0; resp->headers[i + 1]; i++)
 737     {
 738       const char *b = resp->headers[i];
 739       const char *e = resp->headers[i + 1];
 740       /* Skip CRLF */
 741       if (b < e && e[-1] == '\n')
 742         --e;
 743       if (b < e && e[-1] == '\r')
 744         --e;
 745       /* This is safe even on printfs with broken handling of "%.<n>s"
 746          because resp->headers ends with \0.  */
 747       logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
 748     }
 749 }
 750
 751 /* Parse the `Content-Range' header and extract the information it
 752    contains.  Returns 1 if successful, -1 otherwise.  */
 753 static int
 754 parse_content_range (const char *hdr, wgint *first_byte_ptr,
 755                      wgint *last_byte_ptr, wgint *entity_length_ptr)
 756 {
 757   wgint num;
 758
 759   /* Ancient versions of Netscape proxy server, presumably predating
 760      rfc2068, sent out `Content-Range' without the "bytes"
 761      specifier.  */
 762   if (!strncasecmp (hdr, "bytes", 5))
 763     {
 764       hdr += 5;
 765       /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
 766          HTTP spec. */
 767       if (*hdr == ':')
 768         ++hdr;
 769       while (ISSPACE (*hdr))
 770         ++hdr;
 771       if (!*hdr)
 772         return 0;
 773     }
 774   if (!ISDIGIT (*hdr))
 775     return 0;
 776   for (num = 0; ISDIGIT (*hdr); hdr++)
 777     num = 10 * num + (*hdr - '0');
 778   if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
 779     return 0;
 780   *first_byte_ptr = num;
 781   ++hdr;
 782   for (num = 0; ISDIGIT (*hdr); hdr++)
 783     num = 10 * num + (*hdr - '0');
 784   if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
 785     return 0;
 786   *last_byte_ptr = num;
 787   ++hdr;
 788   for (num = 0; ISDIGIT (*hdr); hdr++)
 789     num = 10 * num + (*hdr - '0');
 790   *entity_length_ptr = num;
 791   return 1;
 792 }
 793
 794 /* Read the body of the request, but don't store it anywhere and don't
 795    display a progress gauge.  This is useful for reading the bodies of
 796    administrative responses to which we will soon issue another
 797    request.  The response is not useful to the user, but reading it
 798    allows us to continue using the same connection to the server.
 799
 800    If reading fails, 0 is returned, non-zero otherwise.  In debug
 801    mode, the body is displayed for debugging purposes.  */
 802
 803 static int
 804 skip_short_body (int fd, wgint contlen)
 805 {
 806   enum {
 807     SKIP_SIZE = 512,            /* size of the download buffer */
 808     SKIP_THRESHOLD = 4096       /* the largest size we read */
 809   };
 810   char dlbuf[SKIP_SIZE + 1];
 811   dlbuf[SKIP_SIZE] = '\0';      /* so DEBUGP can safely print it */
 812
 813   /* We shouldn't get here with unknown contlen.  (This will change
 814      with HTTP/1.1, which supports "chunked" transfer.)  */
 815   assert (contlen != -1);
 816
 817   /* If the body is too large, it makes more sense to simply close the
 818      connection than to try to read the body.  */
 819   if (contlen > SKIP_THRESHOLD)
 820     return 0;
 821
 822   DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
 823
 824   while (contlen > 0)
 825     {
 826       int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
 827       if (ret <= 0)
 828         {
 829           /* Don't normally report the error since this is an
 830              optimization that should be invisible to the user.  */
 831           DEBUGP (("] aborting (%s).\n",
 832                    ret < 0 ? strerror (errno) : "EOF received"));
 833           return 0;
 834         }
 835       contlen -= ret;
 836       /* Safe even if %.*s bogusly expects terminating \0 because
 837          we've zero-terminated dlbuf above.  */
 838       DEBUGP (("%.*s", ret, dlbuf));
 839     }
 840
 841   DEBUGP (("] done.\n"));
 842   return 1;
 843 }
 844 \f
 845 /* Persistent connections.  Currently, we cache the most recently used
 846    connection as persistent, provided that the HTTP server agrees to
 847    make it such.  The persistence data is stored in the variables
 848    below.  Ideally, it should be possible to cache an arbitrary fixed
 849    number of these connections.  */
 850
 851 /* Whether a persistent connection is active. */
 852 static int pconn_active;
 853
 854 static struct {
 855   /* The socket of the connection.  */
 856   int socket;
 857
 858   /* Host and port of the currently active persistent connection. */
 859   char *host;
 860   int port;
 861
 862   /* Whether a ssl handshake has occoured on this connection.  */
 863   int ssl;
 864
 865   /* Whether the connection was authorized.  This is only done by
 866      NTLM, which authorizes *connections* rather than individual
 867      requests.  (That practice is peculiar for HTTP, but it is a
 868      useful optimization.)  */
 869   int authorized;
 870
 871 #ifdef ENABLE_NTLM
 872   /* NTLM data of the current connection.  */
 873   struct ntlmdata ntlm;
 874 #endif
 875 } pconn;
 876
 877 /* Mark the persistent connection as invalid and free the resources it
 878    uses.  This is used by the CLOSE_* macros after they forcefully
 879    close a registered persistent connection.  */
 880
 881 static void
 882 invalidate_persistent (void)
 883 {
 884   DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
 885   pconn_active = 0;
 886   fd_close (pconn.socket);
 887   xfree (pconn.host);
 888   xzero (pconn);
 889 }
 890
 891 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
 892    persistent.  This will enable someone to use the same connection
 893    later.  In the context of HTTP, this must be called only AFTER the
 894    response has been received and the server has promised that the
 895    connection will remain alive.
 896
 897    If a previous connection was persistent, it is closed. */
 898
 899 static void
 900 register_persistent (const char *host, int port, int fd, int ssl)
 901 {
 902   if (pconn_active)
 903     {
 904       if (pconn.socket == fd)
 905         {
 906           /* The connection FD is already registered. */
 907           return;
 908         }
 909       else
 910         {
 911           /* The old persistent connection is still active; close it
 912              first.  This situation arises whenever a persistent
 913              connection exists, but we then connect to a different
 914              host, and try to register a persistent connection to that
 915              one.  */
 916           invalidate_persistent ();
 917         }
 918     }
 919
 920   pconn_active = 1;
 921   pconn.socket = fd;
 922   pconn.host = xstrdup (host);
 923   pconn.port = port;
 924   pconn.ssl = ssl;
 925   pconn.authorized = 0;
 926
 927   DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
 928 }
 929
 930 /* Return non-zero if a persistent connection is available for
 931    connecting to HOST:PORT.  */
 932
 933 static int
 934 persistent_available_p (const char *host, int port, int ssl,
 935                         int *host_lookup_failed)
 936 {
 937   /* First, check whether a persistent connection is active at all.  */
 938   if (!pconn_active)
 939     return 0;
 940
 941   /* If we want SSL and the last connection wasn't or vice versa,
 942      don't use it.  Checking for host and port is not enough because
 943      HTTP and HTTPS can apparently coexist on the same port.  */
 944   if (ssl != pconn.ssl)
 945     return 0;
 946
 947   /* If we're not connecting to the same port, we're not interested. */
 948   if (port != pconn.port)
 949     return 0;
 950
 951   /* If the host is the same, we're in business.  If not, there is
 952      still hope -- read below.  */
 953   if (0 != strcasecmp (host, pconn.host))
 954     {
 955       /* Check if pconn.socket is talking to HOST under another name.
 956          This happens often when both sites are virtual hosts
 957          distinguished only by name and served by the same network
 958          interface, and hence the same web server (possibly set up by
 959          the ISP and serving many different web sites).  This
 960          admittedly unconventional optimization does not contradict
 961          HTTP and works well with popular server software.  */
 962
 963       int found;
 964       ip_address ip;
 965       struct address_list *al;
 966
 967       if (ssl)
 968         /* Don't try to talk to two different SSL sites over the same
 969            secure connection!  (Besides, it's not clear that
 970            name-based virtual hosting is even possible with SSL.)  */
 971         return 0;
 972
 973       /* If pconn.socket's peer is one of the IP addresses HOST
 974          resolves to, pconn.socket is for all intents and purposes
 975          already talking to HOST.  */
 976
 977       if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
 978         {
 979           /* Can't get the peer's address -- something must be very
 980              wrong with the connection.  */
 981           invalidate_persistent ();
 982           return 0;
 983         }
 984       al = lookup_host (host, 0);
 985       if (!al)
 986         {
 987           *host_lookup_failed = 1;
 988           return 0;
 989         }
 990
 991       found = address_list_contains (al, &ip);
 992       address_list_release (al);
 993
 994       if (!found)
 995         return 0;
 996
 997       /* The persistent connection's peer address was found among the
 998          addresses HOST resolved to; therefore, pconn.sock is in fact
 999          already talking to HOST -- no need to reconnect.  */
1000     }
1001
1002   /* Finally, check whether the connection is still open.  This is
1003      important because most server implement a liberal (short) timeout
1004      on persistent connections.  Wget can of course always reconnect
1005      if the connection doesn't work out, but it's nicer to know in
1006      advance.  This test is a logical followup of the first test, but
1007      is "expensive" and therefore placed at the end of the list.  */
1008
1009   if (!test_socket_open (pconn.socket))
1010     {
1011       /* Oops, the socket is no longer open.  Now that we know that,
1012          let's invalidate the persistent connection before returning
1013          0.  */
1014       invalidate_persistent ();
1015       return 0;
1016     }
1017
1018   return 1;
1019 }
1020
1021 /* The idea behind these two CLOSE macros is to distinguish between
1022    two cases: one when the job we've been doing is finished, and we
1023    want to close the connection and leave, and two when something is
1024    seriously wrong and we're closing the connection as part of
1025    cleanup.
1026
1027    In case of keep_alive, CLOSE_FINISH should leave the connection
1028    open, while CLOSE_INVALIDATE should still close it.
1029
1030    Note that the semantics of the flag `keep_alive' is "this
1031    connection *will* be reused (the server has promised not to close
1032    the connection once we're done)", while the semantics of
1033    `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1034    active, registered connection".  */
1035
1036 #define CLOSE_FINISH(fd) do {                   \
1037   if (!keep_alive)                              \
1038     {                                           \
1039       if (pconn_active && (fd) == pconn.socket) \
1040         invalidate_persistent ();               \
1041       else                                      \
1042         {                                       \
1043           fd_close (fd);                        \
1044           fd = -1;                              \
1045         }                                       \
1046     }                                           \
1047 } while (0)
1048
1049 #define CLOSE_INVALIDATE(fd) do {               \
1050   if (pconn_active && (fd) == pconn.socket)     \
1051     invalidate_persistent ();                   \
1052   else                                          \
1053     fd_close (fd);                              \
1054   fd = -1;                                      \
1055 } while (0)
1056 \f
1057 struct http_stat
1058 {
1059   wgint len;                    /* received length */
1060   wgint contlen;                        /* expected length */
1061   wgint restval;                        /* the restart value */
1062   int res;                      /* the result of last read */
1063   char *newloc;                 /* new location (redirection) */
1064   char *remote_time;            /* remote time-stamp string */
1065   char *error;                  /* textual HTTP error */
1066   int statcode;                 /* status code */
1067   wgint rd_size;                        /* amount of data read from socket */
1068   double dltime;                /* time it took to download the data */
1069   const char *referer;          /* value of the referer header. */
1070   char **local_file;            /* local file. */
1071 };
1072
1073 static void
1074 free_hstat (struct http_stat *hs)
1075 {
1076   xfree_null (hs->newloc);
1077   xfree_null (hs->remote_time);
1078   xfree_null (hs->error);
1079
1080   /* Guard against being called twice. */
1081   hs->newloc = NULL;
1082   hs->remote_time = NULL;
1083   hs->error = NULL;
1084 }
1085
1086 static char *create_authorization_line (const char *, const char *,
1087                                         const char *, const char *,
1088                                         const char *, int *);
1089 static char *basic_authentication_encode (const char *, const char *);
1090 static int known_authentication_scheme_p (const char *, const char *);
1091
1092 time_t http_atotm (const char *);
1093
1094 #define BEGINS_WITH(line, string_constant)                              \
1095   (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)   \
1096    && (ISSPACE (line[sizeof (string_constant) - 1])                     \
1097        || !line[sizeof (string_constant) - 1]))
1098
1099 #define SET_USER_AGENT(req) do {                                        \
1100   if (!opt.useragent)                                                   \
1101     request_set_header (req, "User-Agent",                              \
1102                         aprintf ("Wget/%s", version_string), rel_value); \
1103   else if (*opt.useragent)                                              \
1104     request_set_header (req, "User-Agent", opt.useragent, rel_none);    \
1105 } while (0)
1106
1107 /* The flags that allow clobbering the file (opening with "wb").
1108    Defined here to avoid repetition later.  #### This will require
1109    rework.  */
1110 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1111                        || opt.dirstruct || opt.output_document)
1112
1113 /* Retrieve a document through HTTP protocol.  It recognizes status
1114    code, and correctly handles redirections.  It closes the network
1115    socket.  If it receives an error from the functions below it, it
1116    will print it if there is enough information to do so (almost
1117    always), returning the error to the caller (i.e. http_loop).
1118
1119    Various HTTP parameters are stored to hs.
1120
1121    If PROXY is non-NULL, the connection will be made to the proxy
1122    server, and u->url will be requested.  */
1123 static uerr_t
1124 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1125 {
1126   struct request *req;
1127
1128   char *type;
1129   char *user, *passwd;
1130   char *proxyauth;
1131   int statcode;
1132   int write_error;
1133   wgint contlen, contrange;
1134   struct url *conn;
1135   FILE *fp;
1136
1137   int sock = -1;
1138   int flags;
1139
1140   /* Set to 1 when the authorization has failed permanently and should
1141      not be tried again. */
1142   int auth_finished = 0;
1143
1144   /* Whether NTLM authentication is used for this request. */
1145   int ntlm_seen = 0;
1146
1147   /* Whether our connection to the remote host is through SSL.  */
1148   int using_ssl = 0;
1149
1150   /* Whether a HEAD request will be issued (as opposed to GET or
1151      POST). */
1152   int head_only = *dt & HEAD_ONLY;
1153
1154   char *head;
1155   struct response *resp;
1156   char hdrval[256];
1157   char *message;
1158
1159   /* Whether this connection will be kept alive after the HTTP request
1160      is done. */
1161   int keep_alive;
1162
1163   /* Whether keep-alive should be inhibited.
1164
1165      RFC 2068 requests that 1.0 clients not send keep-alive requests
1166      to proxies.  This is because many 1.0 proxies do not interpret
1167      the Connection header and transfer it to the remote server,
1168      causing it to not close the connection and leave both the proxy
1169      and the client hanging.  */
1170   int inhibit_keep_alive =
1171     !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1172
1173   /* Headers sent when using POST. */
1174   wgint post_data_size = 0;
1175
1176   int host_lookup_failed = 0;
1177
1178 #ifdef HAVE_SSL
1179   if (u->scheme == SCHEME_HTTPS)
1180     {
1181       /* Initialize the SSL context.  After this has once been done,
1182          it becomes a no-op.  */
1183       if (!ssl_init ())
1184         {
1185           scheme_disable (SCHEME_HTTPS);
1186           logprintf (LOG_NOTQUIET,
1187                      _("Disabling SSL due to encountered errors.\n"));
1188           return SSLINITFAILED;
1189         }
1190     }
1191 #endif /* HAVE_SSL */
1192
1193   if (!head_only)
1194     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1195        know the local filename so we can save to it. */
1196     assert (*hs->local_file != NULL);
1197
1198   /* Initialize certain elements of struct http_stat.  */
1199   hs->len = 0;
1200   hs->contlen = -1;
1201   hs->res = -1;
1202   hs->newloc = NULL;
1203   hs->remote_time = NULL;
1204   hs->error = NULL;
1205
1206   conn = u;
1207
1208   /* Prepare the request to send. */
1209
1210   req = request_new ();
1211   {
1212     char *meth_arg;
1213     const char *meth = "GET";
1214     if (head_only)
1215       meth = "HEAD";
1216     else if (opt.post_file_name || opt.post_data)
1217       meth = "POST";
1218     /* Use the full path, i.e. one that includes the leading slash and
1219        the query string.  E.g. if u->path is "foo/bar" and u->query is
1220        "param=value", full_path will be "/foo/bar?param=value".  */
1221     if (proxy
1222 #ifdef HAVE_SSL
1223         /* When using SSL over proxy, CONNECT establishes a direct
1224            connection to the HTTPS server.  Therefore use the same
1225            argument as when talking to the server directly. */
1226         && u->scheme != SCHEME_HTTPS
1227 #endif
1228         )
1229       meth_arg = xstrdup (u->url);
1230     else
1231       meth_arg = url_full_path (u);
1232     request_set_method (req, meth, meth_arg);
1233   }
1234
1235   request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1236   if (*dt & SEND_NOCACHE)
1237     request_set_header (req, "Pragma", "no-cache", rel_none);
1238   if (hs->restval)
1239     request_set_header (req, "Range",
1240                         aprintf ("bytes=%s-",
1241                                  number_to_static_string (hs->restval)),
1242                         rel_value);
1243   SET_USER_AGENT (req);
1244   request_set_header (req, "Accept", "*/*", rel_none);
1245
1246   /* Find the username and password for authentication. */
1247   user = u->user;
1248   passwd = u->passwd;
1249   search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1250   user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1251   passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1252
1253   if (user && passwd)
1254     {
1255       /* We have the username and the password, but haven't tried
1256          any authorization yet.  Let's see if the "Basic" method
1257          works.  If not, we'll come back here and construct a
1258          proper authorization method with the right challenges.
1259
1260          If we didn't employ this kind of logic, every URL that
1261          requires authorization would have to be processed twice,
1262          which is very suboptimal and generates a bunch of false
1263          "unauthorized" errors in the server log.
1264
1265          #### But this logic also has a serious problem when used
1266          with stronger authentications: we *first* transmit the
1267          username and the password in clear text, and *then* attempt a
1268          stronger authentication scheme.  That cannot be right!  We
1269          are only fortunate that almost everyone still uses the
1270          `Basic' scheme anyway.
1271
1272          There should be an option to prevent this from happening, for
1273          those who use strong authentication schemes and value their
1274          passwords.  */
1275       request_set_header (req, "Authorization",
1276                           basic_authentication_encode (user, passwd),
1277                           rel_value);
1278     }
1279
1280   proxyauth = NULL;
1281   if (proxy)
1282     {
1283       char *proxy_user, *proxy_passwd;
1284       /* For normal username and password, URL components override
1285          command-line/wgetrc parameters.  With proxy
1286          authentication, it's the reverse, because proxy URLs are
1287          normally the "permanent" ones, so command-line args
1288          should take precedence.  */
1289       if (opt.proxy_user && opt.proxy_passwd)
1290         {
1291           proxy_user = opt.proxy_user;
1292           proxy_passwd = opt.proxy_passwd;
1293         }
1294       else
1295         {
1296           proxy_user = proxy->user;
1297           proxy_passwd = proxy->passwd;
1298         }
1299       /* #### This does not appear right.  Can't the proxy request,
1300          say, `Digest' authentication?  */
1301       if (proxy_user && proxy_passwd)
1302         proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1303
1304       /* If we're using a proxy, we will be connecting to the proxy
1305          server.  */
1306       conn = proxy;
1307
1308       /* Proxy authorization over SSL is handled below. */
1309 #ifdef HAVE_SSL
1310       if (u->scheme != SCHEME_HTTPS)
1311 #endif
1312         request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1313     }
1314
1315   {
1316     /* Whether we need to print the host header with braces around
1317        host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1318        usual "Host: symbolic-name:1234". */
1319     int squares = strchr (u->host, ':') != NULL;
1320     if (u->port == scheme_default_port (u->scheme))
1321       request_set_header (req, "Host",
1322                           aprintf (squares ? "[%s]" : "%s", u->host),
1323                           rel_value);
1324     else
1325       request_set_header (req, "Host",
1326                           aprintf (squares ? "[%s]:%d" : "%s:%d",
1327                                    u->host, u->port),
1328                           rel_value);
1329   }
1330
1331   if (!inhibit_keep_alive)
1332     request_set_header (req, "Connection", "Keep-Alive", rel_none);
1333
1334   if (opt.cookies)
1335     request_set_header (req, "Cookie",
1336                         cookie_header (wget_cookie_jar,
1337                                        u->host, u->port, u->path,
1338 #ifdef HAVE_SSL
1339                                        u->scheme == SCHEME_HTTPS
1340 #else
1341                                        0
1342 #endif
1343                                        ),
1344                         rel_value);
1345
1346   if (opt.post_data || opt.post_file_name)
1347     {
1348       request_set_header (req, "Content-Type",
1349                           "application/x-www-form-urlencoded", rel_none);
1350       if (opt.post_data)
1351         post_data_size = strlen (opt.post_data);
1352       else
1353         {
1354           post_data_size = file_size (opt.post_file_name);
1355           if (post_data_size == -1)
1356             {
1357               logprintf (LOG_NOTQUIET, _("POST data file missing: %s (%s)\n"),
1358                          opt.post_file_name, strerror (errno));
1359               post_data_size = 0;
1360             }
1361         }
1362       request_set_header (req, "Content-Length",
1363                           xstrdup (number_to_static_string (post_data_size)),
1364                           rel_value);
1365     }
1366
1367   /* Add the user headers. */
1368   if (opt.user_headers)
1369     {
1370       int i;
1371       for (i = 0; opt.user_headers[i]; i++)
1372         request_set_user_header (req, opt.user_headers[i]);
1373     }
1374
1375  retry_with_auth:
1376   /* We need to come back here when the initial attempt to retrieve
1377      without authorization header fails.  (Expected to happen at least
1378      for the Digest authorization scheme.)  */
1379
1380   keep_alive = 0;
1381
1382   /* Establish the connection.  */
1383
1384   if (!inhibit_keep_alive)
1385     {
1386       /* Look for a persistent connection to target host, unless a
1387          proxy is used.  The exception is when SSL is in use, in which
1388          case the proxy is nothing but a passthrough to the target
1389          host, registered as a connection to the latter.  */
1390       struct url *relevant = conn;
1391 #ifdef HAVE_SSL
1392       if (u->scheme == SCHEME_HTTPS)
1393         relevant = u;
1394 #endif
1395
1396       if (persistent_available_p (relevant->host, relevant->port,
1397 #ifdef HAVE_SSL
1398                                   relevant->scheme == SCHEME_HTTPS,
1399 #else
1400                                   0,
1401 #endif
1402                                   &host_lookup_failed))
1403         {
1404           sock = pconn.socket;
1405           using_ssl = pconn.ssl;
1406           logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1407                      escnonprint (pconn.host), pconn.port);
1408           DEBUGP (("Reusing fd %d.\n", sock));
1409           if (pconn.authorized)
1410             /* If the connection is already authorized, the "Basic"
1411                authorization added by code above is unnecessary and
1412                only hurts us.  */
1413             request_remove_header (req, "Authorization");
1414         }
1415     }
1416
1417   if (sock < 0)
1418     {
1419       /* In its current implementation, persistent_available_p will
1420          look up conn->host in some cases.  If that lookup failed, we
1421          don't need to bother with connect_to_host.  */
1422       if (host_lookup_failed)
1423         {
1424           request_free (req);
1425           return HOSTERR;
1426         }
1427
1428       sock = connect_to_host (conn->host, conn->port);
1429       if (sock == E_HOST)
1430         {
1431           request_free (req);
1432           return HOSTERR;
1433         }
1434       else if (sock < 0)
1435         {
1436           request_free (req);
1437           return (retryable_socket_connect_error (errno)
1438                   ? CONERROR : CONIMPOSSIBLE);
1439         }
1440
1441 #ifdef HAVE_SSL
1442       if (proxy && u->scheme == SCHEME_HTTPS)
1443         {
1444           /* When requesting SSL URLs through proxies, use the
1445              CONNECT method to request passthrough.  */
1446           struct request *connreq = request_new ();
1447           request_set_method (connreq, "CONNECT",
1448                               aprintf ("%s:%d", u->host, u->port));
1449           SET_USER_AGENT (connreq);
1450           if (proxyauth)
1451             {
1452               request_set_header (connreq, "Proxy-Authorization",
1453                                   proxyauth, rel_value);
1454               /* Now that PROXYAUTH is part of the CONNECT request,
1455                  zero it out so we don't send proxy authorization with
1456                  the regular request below.  */
1457               proxyauth = NULL;
1458             }
1459           /* Examples in rfc2817 use the Host header in CONNECT
1460              requests.  I don't see how that gains anything, given
1461              that the contents of Host would be exactly the same as
1462              the contents of CONNECT.  */
1463
1464           write_error = request_send (connreq, sock);
1465           request_free (connreq);
1466           if (write_error < 0)
1467             {
1468               logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1469                          strerror (errno));
1470               CLOSE_INVALIDATE (sock);
1471               return WRITEFAILED;
1472             }
1473
1474           head = read_http_response_head (sock);
1475           if (!head)
1476             {
1477               logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1478                          strerror (errno));
1479               CLOSE_INVALIDATE (sock);
1480               return HERR;
1481             }
1482           message = NULL;
1483           if (!*head)
1484             {
1485               xfree (head);
1486               goto failed_tunnel;
1487             }
1488           DEBUGP (("proxy responded with: [%s]\n", head));
1489
1490           resp = resp_new (head);
1491           statcode = resp_status (resp, &message);
1492           resp_free (resp);
1493           xfree (head);
1494           if (statcode != 200)
1495             {
1496             failed_tunnel:
1497               logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1498                          message ? escnonprint (message) : "?");
1499               xfree_null (message);
1500               return CONSSLERR;
1501             }
1502           xfree_null (message);
1503
1504           /* SOCK is now *really* connected to u->host, so update CONN
1505              to reflect this.  That way register_persistent will
1506              register SOCK as being connected to u->host:u->port.  */
1507           conn = u;
1508         }
1509
1510       if (conn->scheme == SCHEME_HTTPS)
1511         {
1512           if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1513             {
1514               fd_close (sock);
1515               return CONSSLERR;
1516             }
1517           using_ssl = 1;
1518         }
1519 #endif /* HAVE_SSL */
1520     }
1521
1522   /* Send the request to server.  */
1523   write_error = request_send (req, sock);
1524
1525   if (write_error >= 0)
1526     {
1527       if (opt.post_data)
1528         {
1529           DEBUGP (("[POST data: %s]\n", opt.post_data));
1530           write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1531         }
1532       else if (opt.post_file_name && post_data_size != 0)
1533         write_error = post_file (sock, opt.post_file_name, post_data_size);
1534     }
1535
1536   if (write_error < 0)
1537     {
1538       logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1539                  strerror (errno));
1540       CLOSE_INVALIDATE (sock);
1541       request_free (req);
1542       return WRITEFAILED;
1543     }
1544   logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1545              proxy ? "Proxy" : "HTTP");
1546   contlen = -1;
1547   contrange = 0;
1548   *dt &= ~RETROKF;
1549
1550   head = read_http_response_head (sock);
1551   if (!head)
1552     {
1553       if (errno == 0)
1554         {
1555           logputs (LOG_NOTQUIET, _("No data received.\n"));
1556           CLOSE_INVALIDATE (sock);
1557           request_free (req);
1558           return HEOF;
1559         }
1560       else
1561         {
1562           logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1563                      strerror (errno));
1564           CLOSE_INVALIDATE (sock);
1565           request_free (req);
1566           return HERR;
1567         }
1568     }
1569   DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1570
1571   resp = resp_new (head);
1572
1573   /* Check for status line.  */
1574   message = NULL;
1575   statcode = resp_status (resp, &message);
1576   if (!opt.server_response)
1577     logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1578                message ? escnonprint (message) : "");
1579   else
1580     {
1581       logprintf (LOG_VERBOSE, "\n");
1582       print_server_response (resp, "  ");
1583     }
1584
1585   if (!opt.ignore_length
1586       && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1587     {
1588       wgint parsed;
1589       errno = 0;
1590       parsed = str_to_wgint (hdrval, NULL, 10);
1591       if (parsed == WGINT_MAX && errno == ERANGE)
1592         /* Out of range.
1593            #### If Content-Length is out of range, it most likely
1594            means that the file is larger than 2G and that we're
1595            compiled without LFS.  In that case we should probably
1596            refuse to even attempt to download the file.  */
1597         contlen = -1;
1598       else
1599         contlen = parsed;
1600     }
1601
1602   /* Check for keep-alive related responses. */
1603   if (!inhibit_keep_alive && contlen != -1)
1604     {
1605       if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1606         keep_alive = 1;
1607       else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1608         {
1609           if (0 == strcasecmp (hdrval, "Keep-Alive"))
1610             keep_alive = 1;
1611         }
1612     }
1613   if (keep_alive)
1614     /* The server has promised that it will not close the connection
1615        when we're done.  This means that we can register it.  */
1616     register_persistent (conn->host, conn->port, sock, using_ssl);
1617
1618   if (statcode == HTTP_STATUS_UNAUTHORIZED)
1619     {
1620       /* Authorization is required.  */
1621       if (keep_alive && !head_only && skip_short_body (sock, contlen))
1622         CLOSE_FINISH (sock);
1623       else
1624         CLOSE_INVALIDATE (sock);
1625       pconn.authorized = 0;
1626       if (!auth_finished && (user && passwd))
1627         {
1628           /* IIS sends multiple copies of WWW-Authenticate, one with
1629              the value "negotiate", and other(s) with data.  Loop over
1630              all the occurrences and pick the one we recognize.  */
1631           int wapos;
1632           const char *wabeg, *waend;
1633           char *www_authenticate = NULL;
1634           for (wapos = 0;
1635                (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1636                                             &wabeg, &waend)) != -1;
1637                ++wapos)
1638             if (known_authentication_scheme_p (wabeg, waend))
1639               {
1640                 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1641                 break;
1642               }
1643
1644           if (!www_authenticate)
1645             /* If the authentication header is missing or
1646                unrecognized, there's no sense in retrying.  */
1647             logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1648           else if (BEGINS_WITH (www_authenticate, "Basic"))
1649             /* If the authentication scheme is "Basic", which we send
1650                by default, there's no sense in retrying either.  (This
1651                should be changed when we stop sending "Basic" data by
1652                default.)  */
1653             ;
1654           else
1655             {
1656               char *pth;
1657               pth = url_full_path (u);
1658               request_set_header (req, "Authorization",
1659                                   create_authorization_line (www_authenticate,
1660                                                              user, passwd,
1661                                                              request_method (req),
1662                                                              pth,
1663                                                              &auth_finished),
1664                                   rel_value);
1665               if (BEGINS_WITH (www_authenticate, "NTLM"))
1666                 ntlm_seen = 1;
1667               xfree (pth);
1668               goto retry_with_auth;
1669             }
1670         }
1671       logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1672       request_free (req);
1673       return AUTHFAILED;
1674     }
1675   else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1676     {
1677       /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1678       if (ntlm_seen)
1679         pconn.authorized = 1;
1680     }
1681   request_free (req);
1682
1683   hs->statcode = statcode;
1684   if (statcode == -1)
1685     hs->error = xstrdup (_("Malformed status line"));
1686   else if (!*message)
1687     hs->error = xstrdup (_("(no description)"));
1688   else
1689     hs->error = xstrdup (message);
1690   xfree (message);
1691
1692   type = resp_header_strdup (resp, "Content-Type");
1693   if (type)
1694     {
1695       char *tmp = strchr (type, ';');
1696       if (tmp)
1697         {
1698           while (tmp > type && ISSPACE (tmp[-1]))
1699             --tmp;
1700           *tmp = '\0';
1701         }
1702     }
1703   hs->newloc = resp_header_strdup (resp, "Location");
1704   hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1705
1706   /* Handle (possibly multiple instances of) the Set-Cookie header. */
1707   {
1708     char *pth = NULL;
1709     int scpos;
1710     const char *scbeg, *scend;
1711     /* The jar should have been created by now. */
1712     assert (wget_cookie_jar != NULL);
1713     for (scpos = 0;
1714          (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1715                                       &scbeg, &scend)) != -1;
1716          ++scpos)
1717       {
1718         char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1719         if (pth == NULL)
1720           {
1721             /* u->path doesn't begin with /, which cookies.c expects. */
1722             pth = (char *) alloca (1 + strlen (u->path) + 1);
1723             pth[0] = '/';
1724             strcpy (pth + 1, u->path);
1725           }
1726         cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth,
1727                                   set_cookie);
1728       }
1729   }
1730
1731   if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1732     {
1733       wgint first_byte_pos, last_byte_pos, entity_length;
1734       if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1735                                &entity_length))
1736         contrange = first_byte_pos;
1737     }
1738   resp_free (resp);
1739
1740   /* 20x responses are counted among successful by default.  */
1741   if (H_20X (statcode))
1742     *dt |= RETROKF;
1743
1744   /* Return if redirected.  */
1745   if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1746     {
1747       /* RFC2068 says that in case of the 300 (multiple choices)
1748          response, the server can output a preferred URL through
1749          `Location' header; otherwise, the request should be treated
1750          like GET.  So, if the location is set, it will be a
1751          redirection; otherwise, just proceed normally.  */
1752       if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1753         *dt |= RETROKF;
1754       else
1755         {
1756           logprintf (LOG_VERBOSE,
1757                      _("Location: %s%s\n"),
1758                      hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
1759                      hs->newloc ? _(" [following]") : "");
1760           if (keep_alive && !head_only && skip_short_body (sock, contlen))
1761             CLOSE_FINISH (sock);
1762           else
1763             CLOSE_INVALIDATE (sock);
1764           xfree_null (type);
1765           return NEWLOCATION;
1766         }
1767     }
1768
1769   /* If content-type is not given, assume text/html.  This is because
1770      of the multitude of broken CGI's that "forget" to generate the
1771      content-type.  */
1772   if (!type ||
1773         0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1774         0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1775     *dt |= TEXTHTML;
1776   else
1777     *dt &= ~TEXTHTML;
1778
1779   if (opt.html_extension && (*dt & TEXTHTML))
1780     /* -E / --html-extension / html_extension = on was specified, and this is a
1781        text/html file.  If some case-insensitive variation on ".htm[l]" isn't
1782        already the file's suffix, tack on ".html". */
1783     {
1784       char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
1785
1786       if (last_period_in_local_filename == NULL
1787           || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1788                || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1789         {
1790           int local_filename_len = strlen (*hs->local_file);
1791           /* Resize the local file, allowing for ".html" preceded by
1792              optional ".NUMBER".  */
1793           *hs->local_file = xrealloc (*hs->local_file,
1794                                       local_filename_len + 24 + sizeof (".html"));
1795           strcpy(*hs->local_file + local_filename_len, ".html");
1796           /* If clobbering is not allowed and the file, as named,
1797              exists, tack on ".NUMBER.html" instead. */
1798           if (!ALLOW_CLOBBER)
1799             {
1800               int ext_num = 1;
1801               do
1802                 sprintf (*hs->local_file + local_filename_len,
1803                          ".%d.html", ext_num++);
1804               while (file_exists_p (*hs->local_file));
1805             }
1806           *dt |= ADDED_HTML_EXTENSION;
1807         }
1808     }
1809
1810   if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
1811     {
1812       /* If `-c' is in use and the file has been fully downloaded (or
1813          the remote file has shrunk), Wget effectively requests bytes
1814          after the end of file and the server response with 416.  */
1815       logputs (LOG_VERBOSE, _("\
1816 \n    The file is already fully retrieved; nothing to do.\n\n"));
1817       /* In case the caller inspects. */
1818       hs->len = contlen;
1819       hs->res = 0;
1820       /* Mark as successfully retrieved. */
1821       *dt |= RETROKF;
1822       xfree_null (type);
1823       CLOSE_INVALIDATE (sock);  /* would be CLOSE_FINISH, but there
1824                                    might be more bytes in the body. */
1825       return RETRUNNEEDED;
1826     }
1827   if ((contrange != 0 && contrange != hs->restval)
1828       || (H_PARTIAL (statcode) && !contrange))
1829     {
1830       /* The Range request was somehow misunderstood by the server.
1831          Bail out.  */
1832       xfree_null (type);
1833       CLOSE_INVALIDATE (sock);
1834       return RANGEERR;
1835     }
1836   hs->contlen = contlen + contrange;
1837
1838   if (opt.verbose)
1839     {
1840       if (*dt & RETROKF)
1841         {
1842           /* No need to print this output if the body won't be
1843              downloaded at all, or if the original server response is
1844              printed.  */
1845           logputs (LOG_VERBOSE, _("Length: "));
1846           if (contlen != -1)
1847             {
1848               logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange));
1849               if (contlen + contrange >= 1024)
1850                 logprintf (LOG_VERBOSE, " (%s)",
1851                            human_readable (contlen + contrange));
1852               if (contrange)
1853                 {
1854                   if (contlen >= 1024)
1855                     logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
1856                                with_thousand_seps (contlen),
1857                                human_readable (contlen));
1858                   else
1859                     logprintf (LOG_VERBOSE, _(", %s remaining"),
1860                                with_thousand_seps (contlen));
1861                 }
1862             }
1863           else
1864             logputs (LOG_VERBOSE,
1865                      opt.ignore_length ? _("ignored") : _("unspecified"));
1866           if (type)
1867             logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
1868           else
1869             logputs (LOG_VERBOSE, "\n");
1870         }
1871     }
1872   xfree_null (type);
1873   type = NULL;                  /* We don't need it any more.  */
1874
1875   /* Return if we have no intention of further downloading.  */
1876   if (!(*dt & RETROKF) || head_only)
1877     {
1878       /* In case the caller cares to look...  */
1879       hs->len = 0;
1880       hs->res = 0;
1881       xfree_null (type);
1882       /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
1883          servers not to send body in response to a HEAD request.  If
1884          you encounter such a server (more likely a broken CGI), use
1885          `--no-http-keep-alive'.  */
1886       CLOSE_FINISH (sock);
1887       return RETRFINISHED;
1888     }
1889
1890   /* Open the local file.  */
1891   if (!output_stream)
1892     {
1893       mkalldirs (*hs->local_file);
1894       if (opt.backups)
1895         rotate_backups (*hs->local_file);
1896       if (hs->restval)
1897         fp = fopen (*hs->local_file, "ab");
1898       else if (ALLOW_CLOBBER)
1899         fp = fopen (*hs->local_file, "wb");
1900       else
1901         {
1902           fp = fopen_excl (*hs->local_file, 1);
1903           if (!fp && errno == EEXIST)
1904             {
1905               /* We cannot just invent a new name and use it (which is
1906                  what functions like unique_create typically do)
1907                  because we told the user we'd use this name.
1908                  Instead, return and retry the download.  */
1909               logprintf (LOG_NOTQUIET,
1910                          _("%s has sprung into existence.\n"),
1911                          *hs->local_file);
1912               CLOSE_INVALIDATE (sock);
1913               return FOPEN_EXCL_ERR;
1914             }
1915         }
1916       if (!fp)
1917         {
1918           logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1919           CLOSE_INVALIDATE (sock);
1920           return FOPENERR;
1921         }
1922     }
1923   else
1924     fp = output_stream;
1925
1926   /* #### This confuses the timestamping code that checks for file
1927      size.  Maybe we should save some additional information?  */
1928   if (opt.save_headers)
1929     fwrite (head, 1, strlen (head), fp);
1930
1931   /* Now we no longer need to store the response header. */
1932   xfree (head);
1933
1934   /* Download the request body.  */
1935   flags = 0;
1936   if (keep_alive)
1937     flags |= rb_read_exactly;
1938   if (hs->restval > 0 && contrange == 0)
1939     /* If the server ignored our range request, instruct fd_read_body
1940        to skip the first RESTVAL bytes of body.  */
1941     flags |= rb_skip_startpos;
1942   hs->len = hs->restval;
1943   hs->rd_size = 0;
1944   hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
1945                           hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
1946                           flags);
1947
1948   if (hs->res >= 0)
1949     CLOSE_FINISH (sock);
1950   else
1951     CLOSE_INVALIDATE (sock);
1952
1953   {
1954     /* Close or flush the file.  We have to be careful to check for
1955        error here.  Checking the result of fwrite() is not enough --
1956        errors could go unnoticed!  */
1957     int flush_res;
1958     if (!output_stream)
1959       flush_res = fclose (fp);
1960     else
1961       flush_res = fflush (fp);
1962     if (flush_res == EOF)
1963       hs->res = -2;
1964   }
1965   if (hs->res == -2)
1966     return FWRITEERR;
1967   return RETRFINISHED;
1968 }
1969
1970 /* The genuine HTTP loop!  This is the part where the retrieval is
1971    retried, and retried, and retried, and...  */
1972 uerr_t
1973 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1974            int *dt, struct url *proxy)
1975 {
1976   int count;
1977   int use_ts, got_head = 0;     /* time-stamping info */
1978   char *filename_plus_orig_suffix;
1979   char *local_filename = NULL;
1980   char *tms, *locf, *tmrate;
1981   uerr_t err;
1982   time_t tml = -1, tmr = -1;    /* local and remote time-stamps */
1983   wgint local_size = 0;         /* the size of the local file */
1984   size_t filename_len;
1985   struct http_stat hstat;       /* HTTP status */
1986   struct_stat st;
1987   char *dummy = NULL;
1988
1989   /* This used to be done in main(), but it's a better idea to do it
1990      here so that we don't go through the hoops if we're just using
1991      FTP or whatever. */
1992   if (opt.cookies)
1993     {
1994       if (!wget_cookie_jar)
1995         wget_cookie_jar = cookie_jar_new ();
1996       if (opt.cookies_input && !cookies_loaded_p)
1997         {
1998           cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1999           cookies_loaded_p = 1;
2000         }
2001     }
2002
2003   *newloc = NULL;
2004
2005   /* Warn on (likely bogus) wildcard usage in HTTP.  */
2006   if (has_wildcards_p (u->path))
2007     logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2008
2009   xzero (hstat);
2010
2011   /* Determine the local filename.  */
2012   if (local_file && *local_file)
2013     hstat.local_file = local_file;
2014   else if (local_file && !opt.output_document)
2015     {
2016       *local_file = url_file_name (u);
2017       hstat.local_file = local_file;
2018     }
2019   else
2020     {
2021       dummy = url_file_name (u);
2022       hstat.local_file = &dummy;
2023       /* be honest about where we will save the file */
2024       if (local_file && opt.output_document)
2025         *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2026     }
2027
2028   if (!opt.output_document)
2029     locf = *hstat.local_file;
2030   else
2031     locf = opt.output_document;
2032
2033   hstat.referer = referer;
2034
2035   filename_len = strlen (*hstat.local_file);
2036   filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
2037
2038   if (opt.noclobber && file_exists_p (*hstat.local_file))
2039     {
2040       /* If opt.noclobber is turned on and file already exists, do not
2041          retrieve the file */
2042       logprintf (LOG_VERBOSE, _("\
2043 File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
2044       /* If the file is there, we suppose it's retrieved OK.  */
2045       *dt |= RETROKF;
2046
2047       /* #### Bogusness alert.  */
2048       /* If its suffix is "html" or "htm" or similar, assume text/html.  */
2049       if (has_html_suffix_p (*hstat.local_file))
2050         *dt |= TEXTHTML;
2051
2052       xfree_null (dummy);
2053       return RETROK;
2054     }
2055
2056   use_ts = 0;
2057   if (opt.timestamping)
2058     {
2059       int local_dot_orig_file_exists = 0;
2060
2061       if (opt.backup_converted)
2062         /* If -K is specified, we'll act on the assumption that it was specified
2063            last time these files were downloaded as well, and instead of just
2064            comparing local file X against server file X, we'll compare local
2065            file X.orig (if extant, else X) against server file X.  If -K
2066            _wasn't_ specified last time, or the server contains files called
2067            *.orig, -N will be back to not operating correctly with -k. */
2068         {
2069           /* Would a single s[n]printf() call be faster?  --dan
2070
2071              Definitely not.  sprintf() is horribly slow.  It's a
2072              different question whether the difference between the two
2073              affects a program.  Usually I'd say "no", but at one
2074              point I profiled Wget, and found that a measurable and
2075              non-negligible amount of time was lost calling sprintf()
2076              in url.c.  Replacing sprintf with inline calls to
2077              strcpy() and number_to_string() made a difference.
2078              --hniksic */
2079           memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
2080           memcpy (filename_plus_orig_suffix + filename_len,
2081                   ".orig", sizeof (".orig"));
2082
2083           /* Try to stat() the .orig file. */
2084           if (stat (filename_plus_orig_suffix, &st) == 0)
2085             {
2086               local_dot_orig_file_exists = 1;
2087               local_filename = filename_plus_orig_suffix;
2088             }
2089         }
2090
2091       if (!local_dot_orig_file_exists)
2092         /* Couldn't stat() <file>.orig, so try to stat() <file>. */
2093         if (stat (*hstat.local_file, &st) == 0)
2094           local_filename = *hstat.local_file;
2095
2096       if (local_filename != NULL)
2097         /* There was a local file, so we'll check later to see if the version
2098            the server has is the same version we already have, allowing us to
2099            skip a download. */
2100         {
2101           use_ts = 1;
2102           tml = st.st_mtime;
2103 #ifdef WINDOWS
2104           /* Modification time granularity is 2 seconds for Windows, so
2105              increase local time by 1 second for later comparison. */
2106           tml++;
2107 #endif
2108           local_size = st.st_size;
2109           got_head = 0;
2110         }
2111     }
2112   /* Reset the counter.  */
2113   count = 0;
2114   *dt = 0;
2115   /* THE loop */
2116   do
2117     {
2118       /* Increment the pass counter.  */
2119       ++count;
2120       sleep_between_retrievals (count);
2121       /* Get the current time string.  */
2122       tms = time_str (NULL);
2123       /* Print fetch message, if opt.verbose.  */
2124       if (opt.verbose)
2125         {
2126           char *hurl = url_string (u, 1);
2127           char tmp[256];
2128           strcpy (tmp, "        ");
2129           if (count > 1)
2130             sprintf (tmp, _("(try:%2d)"), count);
2131           logprintf (LOG_VERBOSE, "--%s--  %s\n  %s => `%s'\n",
2132                      tms, hurl, tmp, locf);
2133 #ifdef WINDOWS
2134           ws_changetitle (hurl);
2135 #endif
2136           xfree (hurl);
2137         }
2138
2139       /* Default document type is empty.  However, if spider mode is
2140          on or time-stamping is employed, HEAD_ONLY commands is
2141          encoded within *dt.  */
2142       if (opt.spider || (use_ts && !got_head))
2143         *dt |= HEAD_ONLY;
2144       else
2145         *dt &= ~HEAD_ONLY;
2146
2147       /* Decide whether or not to restart.  */
2148       if (opt.always_rest
2149           && stat (locf, &st) == 0
2150           && S_ISREG (st.st_mode))
2151         /* When -c is used, continue from on-disk size.  (Can't use
2152            hstat.len even if count>1 because we don't want a failed
2153            first attempt to clobber existing data.)  */
2154         hstat.restval = st.st_size;
2155       else if (count > 1)
2156         /* otherwise, continue where the previous try left off */
2157         hstat.restval = hstat.len;
2158       else
2159         hstat.restval = 0;
2160
2161       /* Decide whether to send the no-cache directive.  We send it in
2162          two cases:
2163            a) we're using a proxy, and we're past our first retrieval.
2164               Some proxies are notorious for caching incomplete data, so
2165               we require a fresh get.
2166            b) caching is explicitly inhibited. */
2167       if ((proxy && count > 1)  /* a */
2168           || !opt.allow_cache   /* b */
2169           )
2170         *dt |= SEND_NOCACHE;
2171       else
2172         *dt &= ~SEND_NOCACHE;
2173
2174       /* Try fetching the document, or at least its head.  */
2175       err = gethttp (u, &hstat, dt, proxy);
2176
2177       /* It's unfortunate that wget determines the local filename before finding
2178          out the Content-Type of the file.  Barring a major restructuring of the
2179          code, we need to re-set locf here, since gethttp() may have xrealloc()d
2180          *hstat.local_file to tack on ".html". */
2181       if (!opt.output_document)
2182         locf = *hstat.local_file;
2183
2184       /* Time?  */
2185       tms = time_str (NULL);
2186       /* Get the new location (with or without the redirection).  */
2187       if (hstat.newloc)
2188         *newloc = xstrdup (hstat.newloc);
2189       switch (err)
2190         {
2191         case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2192         case CONERROR: case READERR: case WRITEFAILED:
2193         case RANGEERR: case FOPEN_EXCL_ERR:
2194           /* Non-fatal errors continue executing the loop, which will
2195              bring them to "while" statement at the end, to judge
2196              whether the number of tries was exceeded.  */
2197           free_hstat (&hstat);
2198           printwhat (count, opt.ntry);
2199           if (err == FOPEN_EXCL_ERR)
2200             {
2201               /* Re-determine the file name. */
2202               if (local_file && *local_file)
2203                 {
2204                   xfree (*local_file);
2205                   *local_file = url_file_name (u);
2206                   hstat.local_file = local_file;
2207                 }
2208               else
2209                 {
2210                   xfree (dummy);
2211                   dummy = url_file_name (u);
2212                   hstat.local_file = &dummy;
2213                 }
2214               /* be honest about where we will save the file */
2215               if (local_file && opt.output_document)
2216                 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2217               if (!opt.output_document)
2218                 locf = *hstat.local_file;
2219               else
2220                 locf = opt.output_document;
2221             }
2222           continue;
2223         case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2224         case SSLINITFAILED: case CONTNOTSUPPORTED:
2225           /* Fatal errors just return from the function.  */
2226           free_hstat (&hstat);
2227           xfree_null (dummy);
2228           return err;
2229         case FWRITEERR: case FOPENERR:
2230           /* Another fatal error.  */
2231           logputs (LOG_VERBOSE, "\n");
2232           logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2233                      *hstat.local_file, strerror (errno));
2234           free_hstat (&hstat);
2235           xfree_null (dummy);
2236           return err;
2237         case CONSSLERR:
2238           /* Another fatal error.  */
2239           logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2240           free_hstat (&hstat);
2241           xfree_null (dummy);
2242           return err;
2243         case NEWLOCATION:
2244           /* Return the new location to the caller.  */
2245           if (!hstat.newloc)
2246             {
2247               logprintf (LOG_NOTQUIET,
2248                          _("ERROR: Redirection (%d) without location.\n"),
2249                          hstat.statcode);
2250               free_hstat (&hstat);
2251               xfree_null (dummy);
2252               return WRONGCODE;
2253             }
2254           free_hstat (&hstat);
2255           xfree_null (dummy);
2256           return NEWLOCATION;
2257         case RETRUNNEEDED:
2258           /* The file was already fully retrieved. */
2259           free_hstat (&hstat);
2260           xfree_null (dummy);
2261           return RETROK;
2262         case RETRFINISHED:
2263           /* Deal with you later.  */
2264           break;
2265         default:
2266           /* All possibilities should have been exhausted.  */
2267           abort ();
2268         }
2269       if (!(*dt & RETROKF))
2270         {
2271           if (!opt.verbose)
2272             {
2273               /* #### Ugly ugly ugly! */
2274               char *hurl = url_string (u, 1);
2275               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2276               xfree (hurl);
2277             }
2278           logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2279                      tms, hstat.statcode, escnonprint (hstat.error));
2280           logputs (LOG_VERBOSE, "\n");
2281           free_hstat (&hstat);
2282           xfree_null (dummy);
2283           return WRONGCODE;
2284         }
2285
2286       /* Did we get the time-stamp?  */
2287       if (!got_head)
2288         {
2289           if (opt.timestamping && !hstat.remote_time)
2290             {
2291               logputs (LOG_NOTQUIET, _("\
2292 Last-modified header missing -- time-stamps turned off.\n"));
2293             }
2294           else if (hstat.remote_time)
2295             {
2296               /* Convert the date-string into struct tm.  */
2297               tmr = http_atotm (hstat.remote_time);
2298               if (tmr == (time_t) (-1))
2299                 logputs (LOG_VERBOSE, _("\
2300 Last-modified header invalid -- time-stamp ignored.\n"));
2301             }
2302         }
2303
2304       /* The time-stamping section.  */
2305       if (use_ts)
2306         {
2307           got_head = 1;
2308           *dt &= ~HEAD_ONLY;
2309           use_ts = 0;           /* no more time-stamping */
2310           count = 0;            /* the retrieve count for HEAD is
2311                                    reset */
2312           if (hstat.remote_time && tmr != (time_t) (-1))
2313             {
2314               /* Now time-stamping can be used validly.  Time-stamping
2315                  means that if the sizes of the local and remote file
2316                  match, and local file is newer than the remote file,
2317                  it will not be retrieved.  Otherwise, the normal
2318                  download procedure is resumed.  */
2319               if (tml >= tmr &&
2320                   (hstat.contlen == -1 || local_size == hstat.contlen))
2321                 {
2322                   logprintf (LOG_VERBOSE, _("\
2323 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2324                              local_filename);
2325                   free_hstat (&hstat);
2326                   xfree_null (dummy);
2327                   return RETROK;
2328                 }
2329               else if (tml >= tmr)
2330                 logprintf (LOG_VERBOSE, _("\
2331 The sizes do not match (local %s) -- retrieving.\n"),
2332                            number_to_static_string (local_size));
2333               else
2334                 logputs (LOG_VERBOSE,
2335                          _("Remote file is newer, retrieving.\n"));
2336             }
2337           free_hstat (&hstat);
2338           continue;
2339         }
2340       if ((tmr != (time_t) (-1))
2341           && !opt.spider
2342           && ((hstat.len == hstat.contlen) ||
2343               ((hstat.res == 0) &&
2344                ((hstat.contlen == -1) ||
2345                 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2346         {
2347           /* #### This code repeats in http.c and ftp.c.  Move it to a
2348              function!  */
2349           const char *fl = NULL;
2350           if (opt.output_document)
2351             {
2352               if (output_stream_regular)
2353                 fl = opt.output_document;
2354             }
2355           else
2356             fl = *hstat.local_file;
2357           if (fl)
2358             touch (fl, tmr);
2359         }
2360       /* End of time-stamping section.  */
2361
2362       if (opt.spider)
2363         {
2364           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
2365                      escnonprint (hstat.error));
2366           xfree_null (dummy);
2367           return RETROK;
2368         }
2369
2370       tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
2371
2372       if (hstat.len == hstat.contlen)
2373         {
2374           if (*dt & RETROKF)
2375             {
2376               logprintf (LOG_VERBOSE,
2377                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2378                          tms, tmrate, locf,
2379                          number_to_static_string (hstat.len),
2380                          number_to_static_string (hstat.contlen));
2381               logprintf (LOG_NONVERBOSE,
2382                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2383                          tms, u->url,
2384                          number_to_static_string (hstat.len),
2385                          number_to_static_string (hstat.contlen),
2386                          locf, count);
2387             }
2388           ++opt.numurls;
2389           total_downloaded_bytes += hstat.len;
2390
2391           /* Remember that we downloaded the file for later ".orig" code. */
2392           if (*dt & ADDED_HTML_EXTENSION)
2393             downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2394           else
2395             downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2396
2397           free_hstat (&hstat);
2398           xfree_null (dummy);
2399           return RETROK;
2400         }
2401       else if (hstat.res == 0) /* No read error */
2402         {
2403           if (hstat.contlen == -1)  /* We don't know how much we were supposed
2404                                        to get, so assume we succeeded. */
2405             {
2406               if (*dt & RETROKF)
2407                 {
2408                   logprintf (LOG_VERBOSE,
2409                              _("%s (%s) - `%s' saved [%s]\n\n"),
2410                              tms, tmrate, locf,
2411                              number_to_static_string (hstat.len));
2412                   logprintf (LOG_NONVERBOSE,
2413                              "%s URL:%s [%s] -> \"%s\" [%d]\n",
2414                              tms, u->url, number_to_static_string (hstat.len),
2415                              locf, count);
2416                 }
2417               ++opt.numurls;
2418               total_downloaded_bytes += hstat.len;
2419
2420               /* Remember that we downloaded the file for later ".orig" code. */
2421               if (*dt & ADDED_HTML_EXTENSION)
2422                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2423               else
2424                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2425
2426               free_hstat (&hstat);
2427               xfree_null (dummy);
2428               return RETROK;
2429             }
2430           else if (hstat.len < hstat.contlen) /* meaning we lost the
2431                                                  connection too soon */
2432             {
2433               logprintf (LOG_VERBOSE,
2434                          _("%s (%s) - Connection closed at byte %s. "),
2435                          tms, tmrate, number_to_static_string (hstat.len));
2436               printwhat (count, opt.ntry);
2437               free_hstat (&hstat);
2438               continue;
2439             }
2440           else if (!opt.kill_longer) /* meaning we got more than expected */
2441             {
2442               logprintf (LOG_VERBOSE,
2443                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2444                          tms, tmrate, locf,
2445                          number_to_static_string (hstat.len),
2446                          number_to_static_string (hstat.contlen));
2447               logprintf (LOG_NONVERBOSE,
2448                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2449                          tms, u->url,
2450                          number_to_static_string (hstat.len),
2451                          number_to_static_string (hstat.contlen),
2452                          locf, count);
2453               ++opt.numurls;
2454               total_downloaded_bytes += hstat.len;
2455
2456               /* Remember that we downloaded the file for later ".orig" code. */
2457               if (*dt & ADDED_HTML_EXTENSION)
2458                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2459               else
2460                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2461
2462               free_hstat (&hstat);
2463               xfree_null (dummy);
2464               return RETROK;
2465             }
2466           else                  /* the same, but not accepted */
2467             {
2468               logprintf (LOG_VERBOSE,
2469                          _("%s (%s) - Connection closed at byte %s/%s. "),
2470                          tms, tmrate,
2471                          number_to_static_string (hstat.len),
2472                          number_to_static_string (hstat.contlen));
2473               printwhat (count, opt.ntry);
2474               free_hstat (&hstat);
2475               continue;
2476             }
2477         }
2478       else                      /* now hstat.res can only be -1 */
2479         {
2480           if (hstat.contlen == -1)
2481             {
2482               logprintf (LOG_VERBOSE,
2483                          _("%s (%s) - Read error at byte %s (%s)."),
2484                          tms, tmrate, number_to_static_string (hstat.len),
2485                          strerror (errno));
2486               printwhat (count, opt.ntry);
2487               free_hstat (&hstat);
2488               continue;
2489             }
2490           else                  /* hstat.res == -1 and contlen is given */
2491             {
2492               logprintf (LOG_VERBOSE,
2493                          _("%s (%s) - Read error at byte %s/%s (%s). "),
2494                          tms, tmrate,
2495                          number_to_static_string (hstat.len),
2496                          number_to_static_string (hstat.contlen),
2497                          strerror (errno));
2498               printwhat (count, opt.ntry);
2499               free_hstat (&hstat);
2500               continue;
2501             }
2502         }
2503       /* not reached */
2504     }
2505   while (!opt.ntry || (count < opt.ntry));
2506   return TRYLIMEXC;
2507 }
2508 \f
2509 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2510    than local timezone.
2511
2512    mktime is similar but assumes struct tm, also known as the
2513    "broken-down" form of time, is in local time zone.  mktime_from_utc
2514    uses mktime to make the conversion understanding that an offset
2515    will be introduced by the local time assumption.
2516
2517    mktime_from_utc then measures the introduced offset by applying
2518    gmtime to the initial result and applying mktime to the resulting
2519    "broken-down" form.  The difference between the two mktime results
2520    is the measured offset which is then subtracted from the initial
2521    mktime result to yield a calendar time which is the value returned.
2522
2523    tm_isdst in struct tm is set to 0 to force mktime to introduce a
2524    consistent offset (the non DST offset) since tm and tm+o might be
2525    on opposite sides of a DST change.
2526
2527    Some implementations of mktime return -1 for the nonexistent
2528    localtime hour at the beginning of DST.  In this event, use
2529    mktime(tm - 1hr) + 3600.
2530
2531    Schematically
2532      mktime(tm)   --> t+o
2533      gmtime(t+o)  --> tm+o
2534      mktime(tm+o) --> t+2o
2535      t+o - (t+2o - t+o) = t
2536
2537    Note that glibc contains a function of the same purpose named
2538    `timegm' (reverse of gmtime).  But obviously, it is not universally
2539    available, and unfortunately it is not straightforwardly
2540    extractable for use here.  Perhaps configure should detect timegm
2541    and use it where available.
2542
2543    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2544    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2545    Further improved by Roger with assistance from Edward J. Sabol
2546    based on input by Jamie Zawinski.  */
2547
2548 static time_t
2549 mktime_from_utc (struct tm *t)
2550 {
2551   time_t tl, tb;
2552   struct tm *tg;
2553
2554   tl = mktime (t);
2555   if (tl == -1)
2556     {
2557       t->tm_hour--;
2558       tl = mktime (t);
2559       if (tl == -1)
2560         return -1; /* can't deal with output from strptime */
2561       tl += 3600;
2562     }
2563   tg = gmtime (&tl);
2564   tg->tm_isdst = 0;
2565   tb = mktime (tg);
2566   if (tb == -1)
2567     {
2568       tg->tm_hour--;
2569       tb = mktime (tg);
2570       if (tb == -1)
2571         return -1; /* can't deal with output from gmtime */
2572       tb += 3600;
2573     }
2574   return (tl - (tb - tl));
2575 }
2576
2577 /* Check whether the result of strptime() indicates success.
2578    strptime() returns the pointer to how far it got to in the string.
2579    The processing has been successful if the string is at `GMT' or
2580    `+X', or at the end of the string.
2581
2582    In extended regexp parlance, the function returns 1 if P matches
2583    "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
2584    can return) is considered a failure and 0 is returned.  */
2585 static int
2586 check_end (const char *p)
2587 {
2588   if (!p)
2589     return 0;
2590   while (ISSPACE (*p))
2591     ++p;
2592   if (!*p
2593       || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2594       || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2595     return 1;
2596   else
2597     return 0;
2598 }
2599
2600 /* Convert the textual specification of time in TIME_STRING to the
2601    number of seconds since the Epoch.
2602
2603    TIME_STRING can be in any of the three formats RFC2616 allows the
2604    HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2605    as well as the time format used in the Set-Cookie header.
2606    Timezones are ignored, and should be GMT.
2607
2608    Return the computed time_t representation, or -1 if the conversion
2609    fails.
2610
2611    This function uses strptime with various string formats for parsing
2612    TIME_STRING.  This results in a parser that is not as lenient in
2613    interpreting TIME_STRING as I would like it to be.  Being based on
2614    strptime, it always allows shortened months, one-digit days, etc.,
2615    but due to the multitude of formats in which time can be
2616    represented, an ideal HTTP time parser would be even more
2617    forgiving.  It should completely ignore things like week days and
2618    concentrate only on the various forms of representing years,
2619    months, days, hours, minutes, and seconds.  For example, it would
2620    be nice if it accepted ISO 8601 out of the box.
2621
2622    I've investigated free and PD code for this purpose, but none was
2623    usable.  getdate was big and unwieldy, and had potential copyright
2624    issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
2625    distributed with phttpd, is excellent, but we cannot use it because
2626    it is not assigned to the FSF.  So I stuck it with strptime.  */
2627
2628 time_t
2629 http_atotm (const char *time_string)
2630 {
2631   /* NOTE: Solaris strptime man page claims that %n and %t match white
2632      space, but that's not universally available.  Instead, we simply
2633      use ` ' to mean "skip all WS", which works under all strptime
2634      implementations I've tested.  */
2635
2636   static const char *time_formats[] = {
2637     "%a, %d %b %Y %T",          /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2638     "%A, %d-%b-%y %T",          /* rfc850:  Thursday, 29-Jan-98 22:12:57 */
2639     "%a %b %d %T %Y",           /* asctime: Thu Jan 29 22:12:57 1998 */
2640     "%a, %d-%b-%Y %T"           /* cookies: Thu, 29-Jan-1998 22:12:57
2641                                    (used in Set-Cookie, defined in the
2642                                    Netscape cookie specification.) */
2643   };
2644   int i;
2645
2646   for (i = 0; i < countof (time_formats); i++)
2647     {
2648       struct tm t;
2649
2650       /* Some versions of strptime use the existing contents of struct
2651          tm to recalculate the date according to format.  Zero it out
2652          to prevent garbage from the stack influencing strptime.  */
2653       xzero (t);
2654
2655       /* Solaris strptime fails to recognize English month names in
2656          non-English locales, which we work around by not setting the
2657          LC_TIME category.  Another way would be to temporarily set
2658          locale to C before invoking strptime, but that's slow and
2659          messy.  GNU strptime does not have this problem because it
2660          recognizes English month names along with the local ones.  */
2661
2662       if (check_end (strptime (time_string, time_formats[i], &t)))
2663         return mktime_from_utc (&t);
2664     }
2665
2666   /* All formats have failed.  */
2667   return -1;
2668 }
2669 \f
2670 /* Authorization support: We support three authorization schemes:
2671
2672    * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2673
2674    * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2675    consisting of answering to the server's challenge with the proper
2676    MD5 digests.
2677
2678    * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2679    Stenberg for libcurl.  Like digest, NTLM is based on a
2680    challenge-response mechanism, but unlike digest, it is non-standard
2681    (authenticates TCP connections rather than requests), undocumented
2682    and Microsoft-specific.  */
2683
2684 /* Create the authentication header contents for the `Basic' scheme.
2685    This is done by encoding the string "USER:PASS" to base64 and
2686    prepending the string "Basic " in front of it.  */
2687
2688 static char *
2689 basic_authentication_encode (const char *user, const char *passwd)
2690 {
2691   char *t1, *t2;
2692   int len1 = strlen (user) + 1 + strlen (passwd);
2693
2694   t1 = (char *)alloca (len1 + 1);
2695   sprintf (t1, "%s:%s", user, passwd);
2696
2697   t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2698   base64_encode (t1, len1, t2);
2699
2700   return concat_strings ("Basic ", t2, (char *) 0);
2701 }
2702
2703 #define SKIP_WS(x) do {                         \
2704   while (ISSPACE (*(x)))                        \
2705     ++(x);                                      \
2706 } while (0)
2707
2708 #ifdef ENABLE_DIGEST
2709 /* Parse HTTP `WWW-Authenticate:' header.  AU points to the beginning
2710    of a field in such a header.  If the field is the one specified by
2711    ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2712    digest authorization code), extract its value in the (char*)
2713    variable pointed by RET.  Returns negative on a malformed header,
2714    or number of bytes that have been parsed by this call.  */
2715 static int
2716 extract_header_attr (const char *au, const char *attr_name, char **ret)
2717 {
2718   const char *ep;
2719   const char *cp = au;
2720
2721   if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2722     {
2723       cp += strlen (attr_name);
2724       if (!*cp)
2725         return -1;
2726       SKIP_WS (cp);
2727       if (*cp != '=')
2728         return -1;
2729       if (!*++cp)
2730         return -1;
2731       SKIP_WS (cp);
2732       if (*cp != '\"')
2733         return -1;
2734       if (!*++cp)
2735         return -1;
2736       for (ep = cp; *ep && *ep != '\"'; ep++)
2737         ;
2738       if (!*ep)
2739         return -1;
2740       xfree_null (*ret);
2741       *ret = strdupdelim (cp, ep);
2742       return ep - au + 1;
2743     }
2744   else
2745     return 0;
2746 }
2747
2748 /* Dump the hexadecimal representation of HASH to BUF.  HASH should be
2749    an array of 16 bytes containing the hash keys, and BUF should be a
2750    buffer of 33 writable characters (32 for hex digits plus one for
2751    zero termination).  */
2752 static void
2753 dump_hash (unsigned char *buf, const unsigned char *hash)
2754 {
2755   int i;
2756
2757   for (i = 0; i < MD5_HASHLEN; i++, hash++)
2758     {
2759       *buf++ = XNUM_TO_digit (*hash >> 4);
2760       *buf++ = XNUM_TO_digit (*hash & 0xf);
2761     }
2762   *buf = '\0';
2763 }
2764
2765 /* Take the line apart to find the challenge, and compose a digest
2766    authorization header.  See RFC2069 section 2.1.2.  */
2767 static char *
2768 digest_authentication_encode (const char *au, const char *user,
2769                               const char *passwd, const char *method,
2770                               const char *path)
2771 {
2772   static char *realm, *opaque, *nonce;
2773   static struct {
2774     const char *name;
2775     char **variable;
2776   } options[] = {
2777     { "realm", &realm },
2778     { "opaque", &opaque },
2779     { "nonce", &nonce }
2780   };
2781   char *res;
2782
2783   realm = opaque = nonce = NULL;
2784
2785   au += 6;                      /* skip over `Digest' */
2786   while (*au)
2787     {
2788       int i;
2789
2790       SKIP_WS (au);
2791       for (i = 0; i < countof (options); i++)
2792         {
2793           int skip = extract_header_attr (au, options[i].name,
2794                                           options[i].variable);
2795           if (skip < 0)
2796             {
2797               xfree_null (realm);
2798               xfree_null (opaque);
2799               xfree_null (nonce);
2800               return NULL;
2801             }
2802           else if (skip)
2803             {
2804               au += skip;
2805               break;
2806             }
2807         }
2808       if (i == countof (options))
2809         {
2810           while (*au && *au != '=')
2811             au++;
2812           if (*au && *++au)
2813             {
2814               SKIP_WS (au);
2815               if (*au == '\"')
2816                 {
2817                   au++;
2818                   while (*au && *au != '\"')
2819                     au++;
2820                   if (*au)
2821                     au++;
2822                 }
2823             }
2824         }
2825       while (*au && *au != ',')
2826         au++;
2827       if (*au)
2828         au++;
2829     }
2830   if (!realm || !nonce || !user || !passwd || !path || !method)
2831     {
2832       xfree_null (realm);
2833       xfree_null (opaque);
2834       xfree_null (nonce);
2835       return NULL;
2836     }
2837
2838   /* Calculate the digest value.  */
2839   {
2840     ALLOCA_MD5_CONTEXT (ctx);
2841     unsigned char hash[MD5_HASHLEN];
2842     unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2843     unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2844
2845     /* A1BUF = H(user ":" realm ":" password) */
2846     gen_md5_init (ctx);
2847     gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2848     gen_md5_update ((unsigned char *)":", 1, ctx);
2849     gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2850     gen_md5_update ((unsigned char *)":", 1, ctx);
2851     gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2852     gen_md5_finish (ctx, hash);
2853     dump_hash (a1buf, hash);
2854
2855     /* A2BUF = H(method ":" path) */
2856     gen_md5_init (ctx);
2857     gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2858     gen_md5_update ((unsigned char *)":", 1, ctx);
2859     gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2860     gen_md5_finish (ctx, hash);
2861     dump_hash (a2buf, hash);
2862
2863     /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2864     gen_md5_init (ctx);
2865     gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2866     gen_md5_update ((unsigned char *)":", 1, ctx);
2867     gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2868     gen_md5_update ((unsigned char *)":", 1, ctx);
2869     gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2870     gen_md5_finish (ctx, hash);
2871     dump_hash (response_digest, hash);
2872
2873     res = xmalloc (strlen (user)
2874                    + strlen (user)
2875                    + strlen (realm)
2876                    + strlen (nonce)
2877                    + strlen (path)
2878                    + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2879                    + (opaque ? strlen (opaque) : 0)
2880                    + 128);
2881     sprintf (res, "Digest \
2882 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2883              user, realm, nonce, path, response_digest);
2884     if (opaque)
2885       {
2886         char *p = res + strlen (res);
2887         strcat (p, ", opaque=\"");
2888         strcat (p, opaque);
2889         strcat (p, "\"");
2890       }
2891   }
2892   return res;
2893 }
2894 #endif /* ENABLE_DIGEST */
2895
2896 /* Computing the size of a string literal must take into account that
2897    value returned by sizeof includes the terminating \0.  */
2898 #define STRSIZE(literal) (sizeof (literal) - 1)
2899
2900 /* Whether chars in [b, e) begin with the literal string provided as
2901    first argument and are followed by whitespace or terminating \0.
2902    The comparison is case-insensitive.  */
2903 #define STARTS(literal, b, e)                           \
2904   ((e) - (b) >= STRSIZE (literal)                       \
2905    && 0 == strncasecmp (b, literal, STRSIZE (literal))  \
2906    && ((e) - (b) == STRSIZE (literal)                   \
2907        || ISSPACE (b[STRSIZE (literal)])))
2908
2909 static int
2910 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2911 {
2912   return STARTS ("Basic", hdrbeg, hdrend)
2913 #ifdef ENABLE_DIGEST
2914     || STARTS ("Digest", hdrbeg, hdrend)
2915 #endif
2916 #ifdef ENABLE_NTLM
2917     || STARTS ("NTLM", hdrbeg, hdrend)
2918 #endif
2919     ;
2920 }
2921
2922 #undef STARTS
2923
2924 /* Create the HTTP authorization request header.  When the
2925    `WWW-Authenticate' response header is seen, according to the
2926    authorization scheme specified in that header (`Basic' and `Digest'
2927    are supported by the current implementation), produce an
2928    appropriate HTTP authorization request header.  */
2929 static char *
2930 create_authorization_line (const char *au, const char *user,
2931                            const char *passwd, const char *method,
2932                            const char *path, int *finished)
2933 {
2934   /* We are called only with known schemes, so we can dispatch on the
2935      first letter. */
2936   switch (TOUPPER (*au))
2937     {
2938     case 'B':                   /* Basic */
2939       *finished = 1;
2940       return basic_authentication_encode (user, passwd);
2941 #ifdef ENABLE_DIGEST
2942     case 'D':                   /* Digest */
2943       *finished = 1;
2944       return digest_authentication_encode (au, user, passwd, method, path);
2945 #endif
2946 #ifdef ENABLE_NTLM
2947     case 'N':                   /* NTLM */
2948       if (!ntlm_input (&pconn.ntlm, au))
2949         {
2950           *finished = 1;
2951           return NULL;
2952         }
2953       return ntlm_output (&pconn.ntlm, user, passwd, finished);
2954 #endif
2955     default:
2956       /* We shouldn't get here -- this function should be only called
2957          with values approved by known_authentication_scheme_p.  */
2958       abort ();
2959     }
2960 }
2961 \f
2962 void
2963 save_cookies (void)
2964 {
2965   if (wget_cookie_jar)
2966     cookie_jar_save (wget_cookie_jar, opt.cookies_output);
2967 }
2968
2969 void
2970 http_cleanup (void)
2971 {
2972   xfree_null (pconn.host);
2973   if (wget_cookie_jar)
2974     cookie_jar_delete (wget_cookie_jar);
2975 }