sjero.net Git - wget/blob - src/http.c

   1 /* HTTP support.
   2    Copyright (C) 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9  (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif
  38 #include <assert.h>
  39 #include <errno.h>
  40 #include <time.h>
  41
  42 #include "wget.h"
  43 #include "utils.h"
  44 #include "url.h"
  45 #include "host.h"
  46 #include "retr.h"
  47 #include "connect.h"
  48 #include "netrc.h"
  49 #ifdef HAVE_SSL
  50 # include "ssl.h"
  51 #endif
  52 #ifdef ENABLE_NTLM
  53 # include "http-ntlm.h"
  54 #endif
  55 #include "cookies.h"
  56 #ifdef ENABLE_DIGEST
  57 # include "gen-md5.h"
  58 #endif
  59 #include "convert.h"
  60
  61 extern char *version_string;
  62 extern SUM_SIZE_INT total_downloaded_bytes;
  63
  64 extern FILE *output_stream;
  65 extern bool output_stream_regular;
  66
  67 #ifndef MIN
  68 # define MIN(x, y) ((x) > (y) ? (y) : (x))
  69 #endif
  70
  71 \f
  72 static bool cookies_loaded_p;
  73 static struct cookie_jar *wget_cookie_jar;
  74
  75 #define TEXTHTML_S "text/html"
  76 #define TEXTXHTML_S "application/xhtml+xml"
  77
  78 /* Some status code validation macros: */
  79 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
  80 #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
  81 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY           \
  82                          || (x) == HTTP_STATUS_MOVED_TEMPORARILY        \
  83                          || (x) == HTTP_STATUS_SEE_OTHER                \
  84                          || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
  85
  86 /* HTTP/1.0 status codes from RFC1945, provided for reference.  */
  87 /* Successful 2xx.  */
  88 #define HTTP_STATUS_OK                  200
  89 #define HTTP_STATUS_CREATED             201
  90 #define HTTP_STATUS_ACCEPTED            202
  91 #define HTTP_STATUS_NO_CONTENT          204
  92 #define HTTP_STATUS_PARTIAL_CONTENTS    206
  93
  94 /* Redirection 3xx.  */
  95 #define HTTP_STATUS_MULTIPLE_CHOICES    300
  96 #define HTTP_STATUS_MOVED_PERMANENTLY   301
  97 #define HTTP_STATUS_MOVED_TEMPORARILY   302
  98 #define HTTP_STATUS_SEE_OTHER           303 /* from HTTP/1.1 */
  99 #define HTTP_STATUS_NOT_MODIFIED        304
 100 #define HTTP_STATUS_TEMPORARY_REDIRECT  307 /* from HTTP/1.1 */
 101
 102 /* Client error 4xx.  */
 103 #define HTTP_STATUS_BAD_REQUEST         400
 104 #define HTTP_STATUS_UNAUTHORIZED        401
 105 #define HTTP_STATUS_FORBIDDEN           403
 106 #define HTTP_STATUS_NOT_FOUND           404
 107 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
 108
 109 /* Server errors 5xx.  */
 110 #define HTTP_STATUS_INTERNAL            500
 111 #define HTTP_STATUS_NOT_IMPLEMENTED     501
 112 #define HTTP_STATUS_BAD_GATEWAY         502
 113 #define HTTP_STATUS_UNAVAILABLE         503
 114 \f
 115 enum rp {
 116   rel_none, rel_name, rel_value, rel_both
 117 };
 118
 119 struct request {
 120   const char *method;
 121   char *arg;
 122
 123   struct request_header {
 124     char *name, *value;
 125     enum rp release_policy;
 126   } *headers;
 127   int hcount, hcapacity;
 128 };
 129
 130 /* Create a new, empty request.  At least request_set_method must be
 131    called before the request can be used.  */
 132
 133 static struct request *
 134 request_new (void)
 135 {
 136   struct request *req = xnew0 (struct request);
 137   req->hcapacity = 8;
 138   req->headers = xnew_array (struct request_header, req->hcapacity);
 139   return req;
 140 }
 141
 142 /* Set the request's method and its arguments.  METH should be a
 143    literal string (or it should outlive the request) because it will
 144    not be freed.  ARG will be freed by request_free.  */
 145
 146 static void
 147 request_set_method (struct request *req, const char *meth, char *arg)
 148 {
 149   req->method = meth;
 150   req->arg = arg;
 151 }
 152
 153 /* Return the method string passed with the last call to
 154    request_set_method.  */
 155
 156 static const char *
 157 request_method (const struct request *req)
 158 {
 159   return req->method;
 160 }
 161
 162 /* Free one header according to the release policy specified with
 163    request_set_header.  */
 164
 165 static void
 166 release_header (struct request_header *hdr)
 167 {
 168   switch (hdr->release_policy)
 169     {
 170     case rel_none:
 171       break;
 172     case rel_name:
 173       xfree (hdr->name);
 174       break;
 175     case rel_value:
 176       xfree (hdr->value);
 177       break;
 178     case rel_both:
 179       xfree (hdr->name);
 180       xfree (hdr->value);
 181       break;
 182     }
 183 }
 184
 185 /* Set the request named NAME to VALUE.  Specifically, this means that
 186    a "NAME: VALUE\r\n" header line will be used in the request.  If a
 187    header with the same name previously existed in the request, its
 188    value will be replaced by this one.  A NULL value means do nothing.
 189
 190    RELEASE_POLICY determines whether NAME and VALUE should be released
 191    (freed) with request_free.  Allowed values are:
 192
 193     - rel_none     - don't free NAME or VALUE
 194     - rel_name     - free NAME when done
 195     - rel_value    - free VALUE when done
 196     - rel_both     - free both NAME and VALUE when done
 197
 198    Setting release policy is useful when arguments come from different
 199    sources.  For example:
 200
 201      // Don't free literal strings!
 202      request_set_header (req, "Pragma", "no-cache", rel_none);
 203
 204      // Don't free a global variable, we'll need it later.
 205      request_set_header (req, "Referer", opt.referer, rel_none);
 206
 207      // Value freshly allocated, free it when done.
 208      request_set_header (req, "Range",
 209                          aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
 210                          rel_value);
 211    */
 212
 213 static void
 214 request_set_header (struct request *req, char *name, char *value,
 215                     enum rp release_policy)
 216 {
 217   struct request_header *hdr;
 218   int i;
 219
 220   if (!value)
 221     {
 222       /* A NULL value is a no-op; if freeing the name is requested,
 223          free it now to avoid leaks.  */
 224       if (release_policy == rel_name || release_policy == rel_both)
 225         xfree (name);
 226       return;
 227     }
 228
 229   for (i = 0; i < req->hcount; i++)
 230     {
 231       hdr = &req->headers[i];
 232       if (0 == strcasecmp (name, hdr->name))
 233         {
 234           /* Replace existing header. */
 235           release_header (hdr);
 236           hdr->name = name;
 237           hdr->value = value;
 238           hdr->release_policy = release_policy;
 239           return;
 240         }
 241     }
 242
 243   /* Install new header. */
 244
 245   if (req->hcount >= req->hcapacity)
 246     {
 247       req->hcapacity <<= 1;
 248       req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
 249     }
 250   hdr = &req->headers[req->hcount++];
 251   hdr->name = name;
 252   hdr->value = value;
 253   hdr->release_policy = release_policy;
 254 }
 255
 256 /* Like request_set_header, but sets the whole header line, as
 257    provided by the user using the `--header' option.  For example,
 258    request_set_user_header (req, "Foo: bar") works just like
 259    request_set_header (req, "Foo", "bar").  */
 260
 261 static void
 262 request_set_user_header (struct request *req, const char *header)
 263 {
 264   char *name;
 265   const char *p = strchr (header, ':');
 266   if (!p)
 267     return;
 268   BOUNDED_TO_ALLOCA (header, p, name);
 269   ++p;
 270   while (ISSPACE (*p))
 271     ++p;
 272   request_set_header (req, xstrdup (name), (char *) p, rel_name);
 273 }
 274
 275 /* Remove the header with specified name from REQ.  Returns true if
 276    the header was actually removed, false otherwise.  */
 277
 278 static bool
 279 request_remove_header (struct request *req, char *name)
 280 {
 281   int i;
 282   for (i = 0; i < req->hcount; i++)
 283     {
 284       struct request_header *hdr = &req->headers[i];
 285       if (0 == strcasecmp (name, hdr->name))
 286         {
 287           release_header (hdr);
 288           /* Move the remaining headers by one. */
 289           if (i < req->hcount - 1)
 290             memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
 291           --req->hcount;
 292           return true;
 293         }
 294     }
 295   return false;
 296 }
 297
 298 #define APPEND(p, str) do {                     \
 299   int A_len = strlen (str);                     \
 300   memcpy (p, str, A_len);                       \
 301   p += A_len;                                   \
 302 } while (0)
 303
 304 /* Construct the request and write it to FD using fd_write.  */
 305
 306 static int
 307 request_send (const struct request *req, int fd)
 308 {
 309   char *request_string, *p;
 310   int i, size, write_error;
 311
 312   /* Count the request size. */
 313   size = 0;
 314
 315   /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
 316   size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
 317
 318   for (i = 0; i < req->hcount; i++)
 319     {
 320       struct request_header *hdr = &req->headers[i];
 321       /* NAME ": " VALUE "\r\n" */
 322       size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
 323     }
 324
 325   /* "\r\n\0" */
 326   size += 3;
 327
 328   p = request_string = alloca_array (char, size);
 329
 330   /* Generate the request. */
 331
 332   APPEND (p, req->method); *p++ = ' ';
 333   APPEND (p, req->arg);    *p++ = ' ';
 334   memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
 335
 336   for (i = 0; i < req->hcount; i++)
 337     {
 338       struct request_header *hdr = &req->headers[i];
 339       APPEND (p, hdr->name);
 340       *p++ = ':', *p++ = ' ';
 341       APPEND (p, hdr->value);
 342       *p++ = '\r', *p++ = '\n';
 343     }
 344
 345   *p++ = '\r', *p++ = '\n', *p++ = '\0';
 346   assert (p - request_string == size);
 347
 348 #undef APPEND
 349
 350   DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
 351
 352   /* Send the request to the server. */
 353
 354   write_error = fd_write (fd, request_string, size - 1, -1);
 355   if (write_error < 0)
 356     logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
 357                strerror (errno));
 358   return write_error;
 359 }
 360
 361 /* Release the resources used by REQ. */
 362
 363 static void
 364 request_free (struct request *req)
 365 {
 366   int i;
 367   xfree_null (req->arg);
 368   for (i = 0; i < req->hcount; i++)
 369     release_header (&req->headers[i]);
 370   xfree_null (req->headers);
 371   xfree (req);
 372 }
 373
 374 /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
 375    PROMISED_SIZE bytes are sent over the wire -- if the file is
 376    longer, read only that much; if the file is shorter, report an error.  */
 377
 378 static int
 379 post_file (int sock, const char *file_name, wgint promised_size)
 380 {
 381   static char chunk[8192];
 382   wgint written = 0;
 383   int write_error;
 384   FILE *fp;
 385
 386   DEBUGP (("[writing POST file %s ... ", file_name));
 387
 388   fp = fopen (file_name, "rb");
 389   if (!fp)
 390     return -1;
 391   while (!feof (fp) && written < promised_size)
 392     {
 393       int towrite;
 394       int length = fread (chunk, 1, sizeof (chunk), fp);
 395       if (length == 0)
 396         break;
 397       towrite = MIN (promised_size - written, length);
 398       write_error = fd_write (sock, chunk, towrite, -1);
 399       if (write_error < 0)
 400         {
 401           fclose (fp);
 402           return -1;
 403         }
 404       written += towrite;
 405     }
 406   fclose (fp);
 407
 408   /* If we've written less than was promised, report a (probably
 409      nonsensical) error rather than break the promise.  */
 410   if (written < promised_size)
 411     {
 412       errno = EINVAL;
 413       return -1;
 414     }
 415
 416   assert (written == promised_size);
 417   DEBUGP (("done]\n"));
 418   return 0;
 419 }
 420 \f
 421 static const char *
 422 response_head_terminator (const char *hunk, int oldlen, int peeklen)
 423 {
 424   const char *start, *end;
 425
 426   /* If at first peek, verify whether HUNK starts with "HTTP".  If
 427      not, this is a HTTP/0.9 request and we must bail out without
 428      reading anything.  */
 429   if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
 430     return hunk;
 431
 432   if (oldlen < 4)
 433     start = hunk;
 434   else
 435     start = hunk + oldlen - 4;
 436   end = hunk + oldlen + peeklen;
 437
 438   for (; start < end - 1; start++)
 439     if (*start == '\n')
 440       {
 441         if (start < end - 2
 442             && start[1] == '\r'
 443             && start[2] == '\n')
 444           return start + 3;
 445         if (start[1] == '\n')
 446           return start + 2;
 447       }
 448   return NULL;
 449 }
 450
 451 /* The maximum size of a single HTTP response we care to read.  This
 452    is not meant to impose an arbitrary limit, but to protect the user
 453    from Wget slurping up available memory upon encountering malicious
 454    or buggy server output.  Define it to 0 to remove the limit.  */
 455
 456 #define HTTP_RESPONSE_MAX_SIZE 65536
 457
 458 /* Read the HTTP request head from FD and return it.  The error
 459    conditions are the same as with fd_read_hunk.
 460
 461    To support HTTP/0.9 responses, this function tries to make sure
 462    that the data begins with "HTTP".  If this is not the case, no data
 463    is read and an empty request is returned, so that the remaining
 464    data can be treated as body.  */
 465
 466 static char *
 467 read_http_response_head (int fd)
 468 {
 469   return fd_read_hunk (fd, response_head_terminator, 512,
 470                        HTTP_RESPONSE_MAX_SIZE);
 471 }
 472
 473 struct response {
 474   /* The response data. */
 475   const char *data;
 476
 477   /* The array of pointers that indicate where each header starts.
 478      For example, given this HTTP response:
 479
 480        HTTP/1.0 200 Ok
 481        Description: some
 482         text
 483        Etag: x
 484
 485      The headers are located like this:
 486
 487      "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
 488      ^                   ^                             ^          ^
 489      headers[0]          headers[1]                    headers[2] headers[3]
 490
 491      I.e. headers[0] points to the beginning of the request,
 492      headers[1] points to the end of the first header and the
 493      beginning of the second one, etc.  */
 494
 495   const char **headers;
 496 };
 497
 498 /* Create a new response object from the text of the HTTP response,
 499    available in HEAD.  That text is automatically split into
 500    constituent header lines for fast retrieval using
 501    resp_header_*.  */
 502
 503 static struct response *
 504 resp_new (const char *head)
 505 {
 506   const char *hdr;
 507   int count, size;
 508
 509   struct response *resp = xnew0 (struct response);
 510   resp->data = head;
 511
 512   if (*head == '\0')
 513     {
 514       /* Empty head means that we're dealing with a headerless
 515          (HTTP/0.9) response.  In that case, don't set HEADERS at
 516          all.  */
 517       return resp;
 518     }
 519
 520   /* Split HEAD into header lines, so that resp_header_* functions
 521      don't need to do this over and over again.  */
 522
 523   size = count = 0;
 524   hdr = head;
 525   while (1)
 526     {
 527       DO_REALLOC (resp->headers, size, count + 1, const char *);
 528       resp->headers[count++] = hdr;
 529
 530       /* Break upon encountering an empty line. */
 531       if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
 532         break;
 533
 534       /* Find the end of HDR, including continuations. */
 535       do
 536         {
 537           const char *end = strchr (hdr, '\n');
 538           if (end)
 539             hdr = end + 1;
 540           else
 541             hdr += strlen (hdr);
 542         }
 543       while (*hdr == ' ' || *hdr == '\t');
 544     }
 545   DO_REALLOC (resp->headers, size, count + 1, const char *);
 546   resp->headers[count] = NULL;
 547
 548   return resp;
 549 }
 550
 551 /* Locate the header named NAME in the request data, starting with
 552    position START.  This allows the code to loop through the request
 553    data, filtering for all requests of a given name.  Returns the
 554    found position, or -1 for failure.  The code that uses this
 555    function typically looks like this:
 556
 557      for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
 558        ... do something with header ...
 559
 560    If you only care about one header, use resp_header_get instead of
 561    this function.  */
 562
 563 static int
 564 resp_header_locate (const struct response *resp, const char *name, int start,
 565                     const char **begptr, const char **endptr)
 566 {
 567   int i;
 568   const char **headers = resp->headers;
 569   int name_len;
 570
 571   if (!headers || !headers[1])
 572     return -1;
 573
 574   name_len = strlen (name);
 575   if (start > 0)
 576     i = start;
 577   else
 578     i = 1;
 579
 580   for (; headers[i + 1]; i++)
 581     {
 582       const char *b = headers[i];
 583       const char *e = headers[i + 1];
 584       if (e - b > name_len
 585           && b[name_len] == ':'
 586           && 0 == strncasecmp (b, name, name_len))
 587         {
 588           b += name_len + 1;
 589           while (b < e && ISSPACE (*b))
 590             ++b;
 591           while (b < e && ISSPACE (e[-1]))
 592             --e;
 593           *begptr = b;
 594           *endptr = e;
 595           return i;
 596         }
 597     }
 598   return -1;
 599 }
 600
 601 /* Find and retrieve the header named NAME in the request data.  If
 602    found, set *BEGPTR to its starting, and *ENDPTR to its ending
 603    position, and return true.  Otherwise return false.
 604
 605    This function is used as a building block for resp_header_copy
 606    and resp_header_strdup.  */
 607
 608 static bool
 609 resp_header_get (const struct response *resp, const char *name,
 610                  const char **begptr, const char **endptr)
 611 {
 612   int pos = resp_header_locate (resp, name, 0, begptr, endptr);
 613   return pos != -1;
 614 }
 615
 616 /* Copy the response header named NAME to buffer BUF, no longer than
 617    BUFSIZE (BUFSIZE includes the terminating 0).  If the header
 618    exists, true is returned, false otherwise.  If there should be no
 619    limit on the size of the header, use resp_header_strdup instead.
 620
 621    If BUFSIZE is 0, no data is copied, but the boolean indication of
 622    whether the header is present is still returned.  */
 623
 624 static bool
 625 resp_header_copy (const struct response *resp, const char *name,
 626                   char *buf, int bufsize)
 627 {
 628   const char *b, *e;
 629   if (!resp_header_get (resp, name, &b, &e))
 630     return false;
 631   if (bufsize)
 632     {
 633       int len = MIN (e - b, bufsize - 1);
 634       memcpy (buf, b, len);
 635       buf[len] = '\0';
 636     }
 637   return true;
 638 }
 639
 640 /* Return the value of header named NAME in RESP, allocated with
 641    malloc.  If such a header does not exist in RESP, return NULL.  */
 642
 643 static char *
 644 resp_header_strdup (const struct response *resp, const char *name)
 645 {
 646   const char *b, *e;
 647   if (!resp_header_get (resp, name, &b, &e))
 648     return NULL;
 649   return strdupdelim (b, e);
 650 }
 651
 652 /* Parse the HTTP status line, which is of format:
 653
 654    HTTP-Version SP Status-Code SP Reason-Phrase
 655
 656    The function returns the status-code, or -1 if the status line
 657    appears malformed.  The pointer to "reason-phrase" message is
 658    returned in *MESSAGE.  */
 659
 660 static int
 661 resp_status (const struct response *resp, char **message)
 662 {
 663   int status;
 664   const char *p, *end;
 665
 666   if (!resp->headers)
 667     {
 668       /* For a HTTP/0.9 response, assume status 200. */
 669       if (message)
 670         *message = xstrdup (_("No headers, assuming HTTP/0.9"));
 671       return 200;
 672     }
 673
 674   p = resp->headers[0];
 675   end = resp->headers[1];
 676
 677   if (!end)
 678     return -1;
 679
 680   /* "HTTP" */
 681   if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
 682     return -1;
 683   p += 4;
 684
 685   /* Match the HTTP version.  This is optional because Gnutella
 686      servers have been reported to not specify HTTP version.  */
 687   if (p < end && *p == '/')
 688     {
 689       ++p;
 690       while (p < end && ISDIGIT (*p))
 691         ++p;
 692       if (p < end && *p == '.')
 693         ++p;
 694       while (p < end && ISDIGIT (*p))
 695         ++p;
 696     }
 697
 698   while (p < end && ISSPACE (*p))
 699     ++p;
 700   if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
 701     return -1;
 702
 703   status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
 704   p += 3;
 705
 706   if (message)
 707     {
 708       while (p < end && ISSPACE (*p))
 709         ++p;
 710       while (p < end && ISSPACE (end[-1]))
 711         --end;
 712       *message = strdupdelim (p, end);
 713     }
 714
 715   return status;
 716 }
 717
 718 /* Release the resources used by RESP.  */
 719
 720 static void
 721 resp_free (struct response *resp)
 722 {
 723   xfree_null (resp->headers);
 724   xfree (resp);
 725 }
 726
 727 /* Print the server response, line by line, omitting the trailing CRLF
 728    from individual header lines, and prefixed with PREFIX.  */
 729
 730 static void
 731 print_server_response (const struct response *resp, const char *prefix)
 732 {
 733   int i;
 734   if (!resp->headers)
 735     return;
 736   for (i = 0; resp->headers[i + 1]; i++)
 737     {
 738       const char *b = resp->headers[i];
 739       const char *e = resp->headers[i + 1];
 740       /* Skip CRLF */
 741       if (b < e && e[-1] == '\n')
 742         --e;
 743       if (b < e && e[-1] == '\r')
 744         --e;
 745       /* This is safe even on printfs with broken handling of "%.<n>s"
 746          because resp->headers ends with \0.  */
 747       logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
 748     }
 749 }
 750
 751 /* Parse the `Content-Range' header and extract the information it
 752    contains.  Returns true if successful, false otherwise.  */
 753 static bool
 754 parse_content_range (const char *hdr, wgint *first_byte_ptr,
 755                      wgint *last_byte_ptr, wgint *entity_length_ptr)
 756 {
 757   wgint num;
 758
 759   /* Ancient versions of Netscape proxy server, presumably predating
 760      rfc2068, sent out `Content-Range' without the "bytes"
 761      specifier.  */
 762   if (0 == strncasecmp (hdr, "bytes", 5))
 763     {
 764       hdr += 5;
 765       /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
 766          HTTP spec. */
 767       if (*hdr == ':')
 768         ++hdr;
 769       while (ISSPACE (*hdr))
 770         ++hdr;
 771       if (!*hdr)
 772         return false;
 773     }
 774   if (!ISDIGIT (*hdr))
 775     return false;
 776   for (num = 0; ISDIGIT (*hdr); hdr++)
 777     num = 10 * num + (*hdr - '0');
 778   if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
 779     return false;
 780   *first_byte_ptr = num;
 781   ++hdr;
 782   for (num = 0; ISDIGIT (*hdr); hdr++)
 783     num = 10 * num + (*hdr - '0');
 784   if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
 785     return false;
 786   *last_byte_ptr = num;
 787   ++hdr;
 788   for (num = 0; ISDIGIT (*hdr); hdr++)
 789     num = 10 * num + (*hdr - '0');
 790   *entity_length_ptr = num;
 791   return true;
 792 }
 793
 794 /* Read the body of the request, but don't store it anywhere and don't
 795    display a progress gauge.  This is useful for reading the bodies of
 796    administrative responses to which we will soon issue another
 797    request.  The response is not useful to the user, but reading it
 798    allows us to continue using the same connection to the server.
 799
 800    If reading fails, false is returned, true otherwise.  In debug
 801    mode, the body is displayed for debugging purposes.  */
 802
 803 static bool
 804 skip_short_body (int fd, wgint contlen)
 805 {
 806   enum {
 807     SKIP_SIZE = 512,            /* size of the download buffer */
 808     SKIP_THRESHOLD = 4096       /* the largest size we read */
 809   };
 810   char dlbuf[SKIP_SIZE + 1];
 811   dlbuf[SKIP_SIZE] = '\0';      /* so DEBUGP can safely print it */
 812
 813   /* We shouldn't get here with unknown contlen.  (This will change
 814      with HTTP/1.1, which supports "chunked" transfer.)  */
 815   assert (contlen != -1);
 816
 817   /* If the body is too large, it makes more sense to simply close the
 818      connection than to try to read the body.  */
 819   if (contlen > SKIP_THRESHOLD)
 820     return false;
 821
 822   DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
 823
 824   while (contlen > 0)
 825     {
 826       int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
 827       if (ret <= 0)
 828         {
 829           /* Don't normally report the error since this is an
 830              optimization that should be invisible to the user.  */
 831           DEBUGP (("] aborting (%s).\n",
 832                    ret < 0 ? strerror (errno) : "EOF received"));
 833           return false;
 834         }
 835       contlen -= ret;
 836       /* Safe even if %.*s bogusly expects terminating \0 because
 837          we've zero-terminated dlbuf above.  */
 838       DEBUGP (("%.*s", ret, dlbuf));
 839     }
 840
 841   DEBUGP (("] done.\n"));
 842   return true;
 843 }
 844 \f
 845 /* Persistent connections.  Currently, we cache the most recently used
 846    connection as persistent, provided that the HTTP server agrees to
 847    make it such.  The persistence data is stored in the variables
 848    below.  Ideally, it should be possible to cache an arbitrary fixed
 849    number of these connections.  */
 850
 851 /* Whether a persistent connection is active. */
 852 static bool pconn_active;
 853
 854 static struct {
 855   /* The socket of the connection.  */
 856   int socket;
 857
 858   /* Host and port of the currently active persistent connection. */
 859   char *host;
 860   int port;
 861
 862   /* Whether a ssl handshake has occoured on this connection.  */
 863   bool ssl;
 864
 865   /* Whether the connection was authorized.  This is only done by
 866      NTLM, which authorizes *connections* rather than individual
 867      requests.  (That practice is peculiar for HTTP, but it is a
 868      useful optimization.)  */
 869   bool authorized;
 870
 871 #ifdef ENABLE_NTLM
 872   /* NTLM data of the current connection.  */
 873   struct ntlmdata ntlm;
 874 #endif
 875 } pconn;
 876
 877 /* Mark the persistent connection as invalid and free the resources it
 878    uses.  This is used by the CLOSE_* macros after they forcefully
 879    close a registered persistent connection.  */
 880
 881 static void
 882 invalidate_persistent (void)
 883 {
 884   DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
 885   pconn_active = false;
 886   fd_close (pconn.socket);
 887   xfree (pconn.host);
 888   xzero (pconn);
 889 }
 890
 891 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
 892    persistent.  This will enable someone to use the same connection
 893    later.  In the context of HTTP, this must be called only AFTER the
 894    response has been received and the server has promised that the
 895    connection will remain alive.
 896
 897    If a previous connection was persistent, it is closed. */
 898
 899 static void
 900 register_persistent (const char *host, int port, int fd, bool ssl)
 901 {
 902   if (pconn_active)
 903     {
 904       if (pconn.socket == fd)
 905         {
 906           /* The connection FD is already registered. */
 907           return;
 908         }
 909       else
 910         {
 911           /* The old persistent connection is still active; close it
 912              first.  This situation arises whenever a persistent
 913              connection exists, but we then connect to a different
 914              host, and try to register a persistent connection to that
 915              one.  */
 916           invalidate_persistent ();
 917         }
 918     }
 919
 920   pconn_active = true;
 921   pconn.socket = fd;
 922   pconn.host = xstrdup (host);
 923   pconn.port = port;
 924   pconn.ssl = ssl;
 925   pconn.authorized = false;
 926
 927   DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
 928 }
 929
 930 /* Return true if a persistent connection is available for connecting
 931    to HOST:PORT.  */
 932
 933 static bool
 934 persistent_available_p (const char *host, int port, bool ssl,
 935                         bool *host_lookup_failed)
 936 {
 937   /* First, check whether a persistent connection is active at all.  */
 938   if (!pconn_active)
 939     return false;
 940
 941   /* If we want SSL and the last connection wasn't or vice versa,
 942      don't use it.  Checking for host and port is not enough because
 943      HTTP and HTTPS can apparently coexist on the same port.  */
 944   if (ssl != pconn.ssl)
 945     return false;
 946
 947   /* If we're not connecting to the same port, we're not interested. */
 948   if (port != pconn.port)
 949     return false;
 950
 951   /* If the host is the same, we're in business.  If not, there is
 952      still hope -- read below.  */
 953   if (0 != strcasecmp (host, pconn.host))
 954     {
 955       /* Check if pconn.socket is talking to HOST under another name.
 956          This happens often when both sites are virtual hosts
 957          distinguished only by name and served by the same network
 958          interface, and hence the same web server (possibly set up by
 959          the ISP and serving many different web sites).  This
 960          admittedly unconventional optimization does not contradict
 961          HTTP and works well with popular server software.  */
 962
 963       bool found;
 964       ip_address ip;
 965       struct address_list *al;
 966
 967       if (ssl)
 968         /* Don't try to talk to two different SSL sites over the same
 969            secure connection!  (Besides, it's not clear that
 970            name-based virtual hosting is even possible with SSL.)  */
 971         return false;
 972
 973       /* If pconn.socket's peer is one of the IP addresses HOST
 974          resolves to, pconn.socket is for all intents and purposes
 975          already talking to HOST.  */
 976
 977       if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
 978         {
 979           /* Can't get the peer's address -- something must be very
 980              wrong with the connection.  */
 981           invalidate_persistent ();
 982           return false;
 983         }
 984       al = lookup_host (host, 0);
 985       if (!al)
 986         {
 987           *host_lookup_failed = true;
 988           return false;
 989         }
 990
 991       found = address_list_contains (al, &ip);
 992       address_list_release (al);
 993
 994       if (!found)
 995         return false;
 996
 997       /* The persistent connection's peer address was found among the
 998          addresses HOST resolved to; therefore, pconn.sock is in fact
 999          already talking to HOST -- no need to reconnect.  */
1000     }
1001
1002   /* Finally, check whether the connection is still open.  This is
1003      important because most server implement a liberal (short) timeout
1004      on persistent connections.  Wget can of course always reconnect
1005      if the connection doesn't work out, but it's nicer to know in
1006      advance.  This test is a logical followup of the first test, but
1007      is "expensive" and therefore placed at the end of the list.  */
1008
1009   if (!test_socket_open (pconn.socket))
1010     {
1011       /* Oops, the socket is no longer open.  Now that we know that,
1012          let's invalidate the persistent connection before returning
1013          0.  */
1014       invalidate_persistent ();
1015       return false;
1016     }
1017
1018   return true;
1019 }
1020
1021 /* The idea behind these two CLOSE macros is to distinguish between
1022    two cases: one when the job we've been doing is finished, and we
1023    want to close the connection and leave, and two when something is
1024    seriously wrong and we're closing the connection as part of
1025    cleanup.
1026
1027    In case of keep_alive, CLOSE_FINISH should leave the connection
1028    open, while CLOSE_INVALIDATE should still close it.
1029
1030    Note that the semantics of the flag `keep_alive' is "this
1031    connection *will* be reused (the server has promised not to close
1032    the connection once we're done)", while the semantics of
1033    `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1034    active, registered connection".  */
1035
1036 #define CLOSE_FINISH(fd) do {                   \
1037   if (!keep_alive)                              \
1038     {                                           \
1039       if (pconn_active && (fd) == pconn.socket) \
1040         invalidate_persistent ();               \
1041       else                                      \
1042         {                                       \
1043           fd_close (fd);                        \
1044           fd = -1;                              \
1045         }                                       \
1046     }                                           \
1047 } while (0)
1048
1049 #define CLOSE_INVALIDATE(fd) do {               \
1050   if (pconn_active && (fd) == pconn.socket)     \
1051     invalidate_persistent ();                   \
1052   else                                          \
1053     fd_close (fd);                              \
1054   fd = -1;                                      \
1055 } while (0)
1056 \f
1057 struct http_stat
1058 {
1059   wgint len;                    /* received length */
1060   wgint contlen;                /* expected length */
1061   wgint restval;                /* the restart value */
1062   int res;                      /* the result of last read */
1063   char *newloc;                 /* new location (redirection) */
1064   char *remote_time;            /* remote time-stamp string */
1065   char *error;                  /* textual HTTP error */
1066   int statcode;                 /* status code */
1067   wgint rd_size;                /* amount of data read from socket */
1068   double dltime;                /* time it took to download the data */
1069   const char *referer;          /* value of the referer header. */
1070   char **local_file;            /* local file. */
1071 };
1072
1073 static void
1074 free_hstat (struct http_stat *hs)
1075 {
1076   xfree_null (hs->newloc);
1077   xfree_null (hs->remote_time);
1078   xfree_null (hs->error);
1079
1080   /* Guard against being called twice. */
1081   hs->newloc = NULL;
1082   hs->remote_time = NULL;
1083   hs->error = NULL;
1084 }
1085
1086 static char *create_authorization_line (const char *, const char *,
1087                                         const char *, const char *,
1088                                         const char *, bool *);
1089 static char *basic_authentication_encode (const char *, const char *);
1090 static bool known_authentication_scheme_p (const char *, const char *);
1091
1092 time_t http_atotm (const char *);
1093
1094 #define BEGINS_WITH(line, string_constant)                              \
1095   (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)   \
1096    && (ISSPACE (line[sizeof (string_constant) - 1])                     \
1097        || !line[sizeof (string_constant) - 1]))
1098
1099 #define SET_USER_AGENT(req) do {                                        \
1100   if (!opt.useragent)                                                   \
1101     request_set_header (req, "User-Agent",                              \
1102                         aprintf ("Wget/%s", version_string), rel_value); \
1103   else if (*opt.useragent)                                              \
1104     request_set_header (req, "User-Agent", opt.useragent, rel_none);    \
1105 } while (0)
1106
1107 /* The flags that allow clobbering the file (opening with "wb").
1108    Defined here to avoid repetition later.  #### This will require
1109    rework.  */
1110 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1111                        || opt.dirstruct || opt.output_document)
1112
1113 /* Retrieve a document through HTTP protocol.  It recognizes status
1114    code, and correctly handles redirections.  It closes the network
1115    socket.  If it receives an error from the functions below it, it
1116    will print it if there is enough information to do so (almost
1117    always), returning the error to the caller (i.e. http_loop).
1118
1119    Various HTTP parameters are stored to hs.
1120
1121    If PROXY is non-NULL, the connection will be made to the proxy
1122    server, and u->url will be requested.  */
1123 static uerr_t
1124 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1125 {
1126   struct request *req;
1127
1128   char *type;
1129   char *user, *passwd;
1130   char *proxyauth;
1131   int statcode;
1132   int write_error;
1133   wgint contlen, contrange;
1134   struct url *conn;
1135   FILE *fp;
1136
1137   int sock = -1;
1138   int flags;
1139
1140   /* Set to 1 when the authorization has failed permanently and should
1141      not be tried again. */
1142   bool auth_finished = false;
1143
1144   /* Whether NTLM authentication is used for this request. */
1145   bool ntlm_seen = false;
1146
1147   /* Whether our connection to the remote host is through SSL.  */
1148   bool using_ssl = false;
1149
1150   /* Whether a HEAD request will be issued (as opposed to GET or
1151      POST). */
1152   bool head_only = !!(*dt & HEAD_ONLY);
1153
1154   char *head;
1155   struct response *resp;
1156   char hdrval[256];
1157   char *message;
1158
1159   /* Whether this connection will be kept alive after the HTTP request
1160      is done. */
1161   bool keep_alive;
1162
1163   /* Whether keep-alive should be inhibited.
1164
1165      RFC 2068 requests that 1.0 clients not send keep-alive requests
1166      to proxies.  This is because many 1.0 proxies do not interpret
1167      the Connection header and transfer it to the remote server,
1168      causing it to not close the connection and leave both the proxy
1169      and the client hanging.  */
1170   bool inhibit_keep_alive =
1171     !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1172
1173   /* Headers sent when using POST. */
1174   wgint post_data_size = 0;
1175
1176   bool host_lookup_failed = false;
1177
1178 #ifdef HAVE_SSL
1179   if (u->scheme == SCHEME_HTTPS)
1180     {
1181       /* Initialize the SSL context.  After this has once been done,
1182          it becomes a no-op.  */
1183       if (!ssl_init ())
1184         {
1185           scheme_disable (SCHEME_HTTPS);
1186           logprintf (LOG_NOTQUIET,
1187                      _("Disabling SSL due to encountered errors.\n"));
1188           return SSLINITFAILED;
1189         }
1190     }
1191 #endif /* HAVE_SSL */
1192
1193   if (!head_only)
1194     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1195        know the local filename so we can save to it. */
1196     assert (*hs->local_file != NULL);
1197
1198   /* Initialize certain elements of struct http_stat.  */
1199   hs->len = 0;
1200   hs->contlen = -1;
1201   hs->res = -1;
1202   hs->newloc = NULL;
1203   hs->remote_time = NULL;
1204   hs->error = NULL;
1205
1206   conn = u;
1207
1208   /* Prepare the request to send. */
1209
1210   req = request_new ();
1211   {
1212     char *meth_arg;
1213     const char *meth = "GET";
1214     if (head_only)
1215       meth = "HEAD";
1216     else if (opt.post_file_name || opt.post_data)
1217       meth = "POST";
1218     /* Use the full path, i.e. one that includes the leading slash and
1219        the query string.  E.g. if u->path is "foo/bar" and u->query is
1220        "param=value", full_path will be "/foo/bar?param=value".  */
1221     if (proxy
1222 #ifdef HAVE_SSL
1223         /* When using SSL over proxy, CONNECT establishes a direct
1224            connection to the HTTPS server.  Therefore use the same
1225            argument as when talking to the server directly. */
1226         && u->scheme != SCHEME_HTTPS
1227 #endif
1228         )
1229       meth_arg = xstrdup (u->url);
1230     else
1231       meth_arg = url_full_path (u);
1232     request_set_method (req, meth, meth_arg);
1233   }
1234
1235   request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1236   if (*dt & SEND_NOCACHE)
1237     request_set_header (req, "Pragma", "no-cache", rel_none);
1238   if (hs->restval)
1239     request_set_header (req, "Range",
1240                         aprintf ("bytes=%s-",
1241                                  number_to_static_string (hs->restval)),
1242                         rel_value);
1243   SET_USER_AGENT (req);
1244   request_set_header (req, "Accept", "*/*", rel_none);
1245
1246   /* Find the username and password for authentication. */
1247   user = u->user;
1248   passwd = u->passwd;
1249   search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1250   user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1251   passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1252
1253   if (user && passwd)
1254     {
1255       /* We have the username and the password, but haven't tried
1256          any authorization yet.  Let's see if the "Basic" method
1257          works.  If not, we'll come back here and construct a
1258          proper authorization method with the right challenges.
1259
1260          If we didn't employ this kind of logic, every URL that
1261          requires authorization would have to be processed twice,
1262          which is very suboptimal and generates a bunch of false
1263          "unauthorized" errors in the server log.
1264
1265          #### But this logic also has a serious problem when used
1266          with stronger authentications: we *first* transmit the
1267          username and the password in clear text, and *then* attempt a
1268          stronger authentication scheme.  That cannot be right!  We
1269          are only fortunate that almost everyone still uses the
1270          `Basic' scheme anyway.
1271
1272          There should be an option to prevent this from happening, for
1273          those who use strong authentication schemes and value their
1274          passwords.  */
1275       request_set_header (req, "Authorization",
1276                           basic_authentication_encode (user, passwd),
1277                           rel_value);
1278     }
1279
1280   proxyauth = NULL;
1281   if (proxy)
1282     {
1283       char *proxy_user, *proxy_passwd;
1284       /* For normal username and password, URL components override
1285          command-line/wgetrc parameters.  With proxy
1286          authentication, it's the reverse, because proxy URLs are
1287          normally the "permanent" ones, so command-line args
1288          should take precedence.  */
1289       if (opt.proxy_user && opt.proxy_passwd)
1290         {
1291           proxy_user = opt.proxy_user;
1292           proxy_passwd = opt.proxy_passwd;
1293         }
1294       else
1295         {
1296           proxy_user = proxy->user;
1297           proxy_passwd = proxy->passwd;
1298         }
1299       /* #### This does not appear right.  Can't the proxy request,
1300          say, `Digest' authentication?  */
1301       if (proxy_user && proxy_passwd)
1302         proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1303
1304       /* If we're using a proxy, we will be connecting to the proxy
1305          server.  */
1306       conn = proxy;
1307
1308       /* Proxy authorization over SSL is handled below. */
1309 #ifdef HAVE_SSL
1310       if (u->scheme != SCHEME_HTTPS)
1311 #endif
1312         request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1313     }
1314
1315   {
1316     /* Whether we need to print the host header with braces around
1317        host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1318        usual "Host: symbolic-name:1234". */
1319     bool squares = strchr (u->host, ':') != NULL;
1320     if (u->port == scheme_default_port (u->scheme))
1321       request_set_header (req, "Host",
1322                           aprintf (squares ? "[%s]" : "%s", u->host),
1323                           rel_value);
1324     else
1325       request_set_header (req, "Host",
1326                           aprintf (squares ? "[%s]:%d" : "%s:%d",
1327                                    u->host, u->port),
1328                           rel_value);
1329   }
1330
1331   if (!inhibit_keep_alive)
1332     request_set_header (req, "Connection", "Keep-Alive", rel_none);
1333
1334   if (opt.cookies)
1335     request_set_header (req, "Cookie",
1336                         cookie_header (wget_cookie_jar,
1337                                        u->host, u->port, u->path,
1338 #ifdef HAVE_SSL
1339                                        u->scheme == SCHEME_HTTPS
1340 #else
1341                                        0
1342 #endif
1343                                        ),
1344                         rel_value);
1345
1346   if (opt.post_data || opt.post_file_name)
1347     {
1348       request_set_header (req, "Content-Type",
1349                           "application/x-www-form-urlencoded", rel_none);
1350       if (opt.post_data)
1351         post_data_size = strlen (opt.post_data);
1352       else
1353         {
1354           post_data_size = file_size (opt.post_file_name);
1355           if (post_data_size == -1)
1356             {
1357               logprintf (LOG_NOTQUIET, _("POST data file missing: %s (%s)\n"),
1358                          opt.post_file_name, strerror (errno));
1359               post_data_size = 0;
1360             }
1361         }
1362       request_set_header (req, "Content-Length",
1363                           xstrdup (number_to_static_string (post_data_size)),
1364                           rel_value);
1365     }
1366
1367   /* Add the user headers. */
1368   if (opt.user_headers)
1369     {
1370       int i;
1371       for (i = 0; opt.user_headers[i]; i++)
1372         request_set_user_header (req, opt.user_headers[i]);
1373     }
1374
1375  retry_with_auth:
1376   /* We need to come back here when the initial attempt to retrieve
1377      without authorization header fails.  (Expected to happen at least
1378      for the Digest authorization scheme.)  */
1379
1380   keep_alive = false;
1381
1382   /* Establish the connection.  */
1383
1384   if (!inhibit_keep_alive)
1385     {
1386       /* Look for a persistent connection to target host, unless a
1387          proxy is used.  The exception is when SSL is in use, in which
1388          case the proxy is nothing but a passthrough to the target
1389          host, registered as a connection to the latter.  */
1390       struct url *relevant = conn;
1391 #ifdef HAVE_SSL
1392       if (u->scheme == SCHEME_HTTPS)
1393         relevant = u;
1394 #endif
1395
1396       if (persistent_available_p (relevant->host, relevant->port,
1397 #ifdef HAVE_SSL
1398                                   relevant->scheme == SCHEME_HTTPS,
1399 #else
1400                                   0,
1401 #endif
1402                                   &host_lookup_failed))
1403         {
1404           sock = pconn.socket;
1405           using_ssl = pconn.ssl;
1406           logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1407                      escnonprint (pconn.host), pconn.port);
1408           DEBUGP (("Reusing fd %d.\n", sock));
1409           if (pconn.authorized)
1410             /* If the connection is already authorized, the "Basic"
1411                authorization added by code above is unnecessary and
1412                only hurts us.  */
1413             request_remove_header (req, "Authorization");
1414         }
1415     }
1416
1417   if (sock < 0)
1418     {
1419       /* In its current implementation, persistent_available_p will
1420          look up conn->host in some cases.  If that lookup failed, we
1421          don't need to bother with connect_to_host.  */
1422       if (host_lookup_failed)
1423         {
1424           request_free (req);
1425           return HOSTERR;
1426         }
1427
1428       sock = connect_to_host (conn->host, conn->port);
1429       if (sock == E_HOST)
1430         {
1431           request_free (req);
1432           return HOSTERR;
1433         }
1434       else if (sock < 0)
1435         {
1436           request_free (req);
1437           return (retryable_socket_connect_error (errno)
1438                   ? CONERROR : CONIMPOSSIBLE);
1439         }
1440
1441 #ifdef HAVE_SSL
1442       if (proxy && u->scheme == SCHEME_HTTPS)
1443         {
1444           /* When requesting SSL URLs through proxies, use the
1445              CONNECT method to request passthrough.  */
1446           struct request *connreq = request_new ();
1447           request_set_method (connreq, "CONNECT",
1448                               aprintf ("%s:%d", u->host, u->port));
1449           SET_USER_AGENT (connreq);
1450           if (proxyauth)
1451             {
1452               request_set_header (connreq, "Proxy-Authorization",
1453                                   proxyauth, rel_value);
1454               /* Now that PROXYAUTH is part of the CONNECT request,
1455                  zero it out so we don't send proxy authorization with
1456                  the regular request below.  */
1457               proxyauth = NULL;
1458             }
1459           /* Examples in rfc2817 use the Host header in CONNECT
1460              requests.  I don't see how that gains anything, given
1461              that the contents of Host would be exactly the same as
1462              the contents of CONNECT.  */
1463
1464           write_error = request_send (connreq, sock);
1465           request_free (connreq);
1466           if (write_error < 0)
1467             {
1468               logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1469                          strerror (errno));
1470               CLOSE_INVALIDATE (sock);
1471               return WRITEFAILED;
1472             }
1473
1474           head = read_http_response_head (sock);
1475           if (!head)
1476             {
1477               logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1478                          strerror (errno));
1479               CLOSE_INVALIDATE (sock);
1480               return HERR;
1481             }
1482           message = NULL;
1483           if (!*head)
1484             {
1485               xfree (head);
1486               goto failed_tunnel;
1487             }
1488           DEBUGP (("proxy responded with: [%s]\n", head));
1489
1490           resp = resp_new (head);
1491           statcode = resp_status (resp, &message);
1492           resp_free (resp);
1493           xfree (head);
1494           if (statcode != 200)
1495             {
1496             failed_tunnel:
1497               logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1498                          message ? escnonprint (message) : "?");
1499               xfree_null (message);
1500               return CONSSLERR;
1501             }
1502           xfree_null (message);
1503
1504           /* SOCK is now *really* connected to u->host, so update CONN
1505              to reflect this.  That way register_persistent will
1506              register SOCK as being connected to u->host:u->port.  */
1507           conn = u;
1508         }
1509
1510       if (conn->scheme == SCHEME_HTTPS)
1511         {
1512           if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1513             {
1514               fd_close (sock);
1515               return CONSSLERR;
1516             }
1517           using_ssl = true;
1518         }
1519 #endif /* HAVE_SSL */
1520     }
1521
1522   /* Send the request to server.  */
1523   write_error = request_send (req, sock);
1524
1525   if (write_error >= 0)
1526     {
1527       if (opt.post_data)
1528         {
1529           DEBUGP (("[POST data: %s]\n", opt.post_data));
1530           write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1531         }
1532       else if (opt.post_file_name && post_data_size != 0)
1533         write_error = post_file (sock, opt.post_file_name, post_data_size);
1534     }
1535
1536   if (write_error < 0)
1537     {
1538       logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1539                  strerror (errno));
1540       CLOSE_INVALIDATE (sock);
1541       request_free (req);
1542       return WRITEFAILED;
1543     }
1544   logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1545              proxy ? "Proxy" : "HTTP");
1546   contlen = -1;
1547   contrange = 0;
1548   *dt &= ~RETROKF;
1549
1550   head = read_http_response_head (sock);
1551   if (!head)
1552     {
1553       if (errno == 0)
1554         {
1555           logputs (LOG_NOTQUIET, _("No data received.\n"));
1556           CLOSE_INVALIDATE (sock);
1557           request_free (req);
1558           return HEOF;
1559         }
1560       else
1561         {
1562           logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1563                      strerror (errno));
1564           CLOSE_INVALIDATE (sock);
1565           request_free (req);
1566           return HERR;
1567         }
1568     }
1569   DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1570
1571   resp = resp_new (head);
1572
1573   /* Check for status line.  */
1574   message = NULL;
1575   statcode = resp_status (resp, &message);
1576   if (!opt.server_response)
1577     logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1578                message ? escnonprint (message) : "");
1579   else
1580     {
1581       logprintf (LOG_VERBOSE, "\n");
1582       print_server_response (resp, "  ");
1583     }
1584
1585   if (!opt.ignore_length
1586       && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1587     {
1588       wgint parsed;
1589       errno = 0;
1590       parsed = str_to_wgint (hdrval, NULL, 10);
1591       if (parsed == WGINT_MAX && errno == ERANGE)
1592         /* Out of range.
1593            #### If Content-Length is out of range, it most likely
1594            means that the file is larger than 2G and that we're
1595            compiled without LFS.  In that case we should probably
1596            refuse to even attempt to download the file.  */
1597         contlen = -1;
1598       else
1599         contlen = parsed;
1600     }
1601
1602   /* Check for keep-alive related responses. */
1603   if (!inhibit_keep_alive && contlen != -1)
1604     {
1605       if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1606         keep_alive = true;
1607       else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1608         {
1609           if (0 == strcasecmp (hdrval, "Keep-Alive"))
1610             keep_alive = true;
1611         }
1612     }
1613   if (keep_alive)
1614     /* The server has promised that it will not close the connection
1615        when we're done.  This means that we can register it.  */
1616     register_persistent (conn->host, conn->port, sock, using_ssl);
1617
1618   if (statcode == HTTP_STATUS_UNAUTHORIZED)
1619     {
1620       /* Authorization is required.  */
1621       if (keep_alive && !head_only && skip_short_body (sock, contlen))
1622         CLOSE_FINISH (sock);
1623       else
1624         CLOSE_INVALIDATE (sock);
1625       pconn.authorized = false;
1626       if (!auth_finished && (user && passwd))
1627         {
1628           /* IIS sends multiple copies of WWW-Authenticate, one with
1629              the value "negotiate", and other(s) with data.  Loop over
1630              all the occurrences and pick the one we recognize.  */
1631           int wapos;
1632           const char *wabeg, *waend;
1633           char *www_authenticate = NULL;
1634           for (wapos = 0;
1635                (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1636                                             &wabeg, &waend)) != -1;
1637                ++wapos)
1638             if (known_authentication_scheme_p (wabeg, waend))
1639               {
1640                 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1641                 break;
1642               }
1643
1644           if (!www_authenticate)
1645             /* If the authentication header is missing or
1646                unrecognized, there's no sense in retrying.  */
1647             logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1648           else if (BEGINS_WITH (www_authenticate, "Basic"))
1649             /* If the authentication scheme is "Basic", which we send
1650                by default, there's no sense in retrying either.  (This
1651                should be changed when we stop sending "Basic" data by
1652                default.)  */
1653             ;
1654           else
1655             {
1656               char *pth;
1657               pth = url_full_path (u);
1658               request_set_header (req, "Authorization",
1659                                   create_authorization_line (www_authenticate,
1660                                                              user, passwd,
1661                                                              request_method (req),
1662                                                              pth,
1663                                                              &auth_finished),
1664                                   rel_value);
1665               if (BEGINS_WITH (www_authenticate, "NTLM"))
1666                 ntlm_seen = true;
1667               xfree (pth);
1668               goto retry_with_auth;
1669             }
1670         }
1671       logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1672       request_free (req);
1673       return AUTHFAILED;
1674     }
1675   else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1676     {
1677       /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1678       if (ntlm_seen)
1679         pconn.authorized = true;
1680     }
1681   request_free (req);
1682
1683   hs->statcode = statcode;
1684   if (statcode == -1)
1685     hs->error = xstrdup (_("Malformed status line"));
1686   else if (!*message)
1687     hs->error = xstrdup (_("(no description)"));
1688   else
1689     hs->error = xstrdup (message);
1690   xfree (message);
1691
1692   type = resp_header_strdup (resp, "Content-Type");
1693   if (type)
1694     {
1695       char *tmp = strchr (type, ';');
1696       if (tmp)
1697         {
1698           while (tmp > type && ISSPACE (tmp[-1]))
1699             --tmp;
1700           *tmp = '\0';
1701         }
1702     }
1703   hs->newloc = resp_header_strdup (resp, "Location");
1704   hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1705
1706   /* Handle (possibly multiple instances of) the Set-Cookie header. */
1707   if (opt.cookies)
1708     {
1709       int scpos;
1710       const char *scbeg, *scend;
1711       /* The jar should have been created by now. */
1712       assert (wget_cookie_jar != NULL);
1713       for (scpos = 0;
1714            (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1715                                         &scbeg, &scend)) != -1;
1716            ++scpos)
1717         {
1718           char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1719           cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
1720                                     u->path, set_cookie);
1721         }
1722     }
1723
1724   if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1725     {
1726       wgint first_byte_pos, last_byte_pos, entity_length;
1727       if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1728                                &entity_length))
1729         contrange = first_byte_pos;
1730     }
1731   resp_free (resp);
1732
1733   /* 20x responses are counted among successful by default.  */
1734   if (H_20X (statcode))
1735     *dt |= RETROKF;
1736
1737   /* Return if redirected.  */
1738   if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1739     {
1740       /* RFC2068 says that in case of the 300 (multiple choices)
1741          response, the server can output a preferred URL through
1742          `Location' header; otherwise, the request should be treated
1743          like GET.  So, if the location is set, it will be a
1744          redirection; otherwise, just proceed normally.  */
1745       if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1746         *dt |= RETROKF;
1747       else
1748         {
1749           logprintf (LOG_VERBOSE,
1750                      _("Location: %s%s\n"),
1751                      hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
1752                      hs->newloc ? _(" [following]") : "");
1753           if (keep_alive && !head_only && skip_short_body (sock, contlen))
1754             CLOSE_FINISH (sock);
1755           else
1756             CLOSE_INVALIDATE (sock);
1757           xfree_null (type);
1758           return NEWLOCATION;
1759         }
1760     }
1761
1762   /* If content-type is not given, assume text/html.  This is because
1763      of the multitude of broken CGI's that "forget" to generate the
1764      content-type.  */
1765   if (!type ||
1766         0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1767         0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1768     *dt |= TEXTHTML;
1769   else
1770     *dt &= ~TEXTHTML;
1771
1772   if (opt.html_extension && (*dt & TEXTHTML))
1773     /* -E / --html-extension / html_extension = on was specified, and this is a
1774        text/html file.  If some case-insensitive variation on ".htm[l]" isn't
1775        already the file's suffix, tack on ".html". */
1776     {
1777       char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
1778
1779       if (last_period_in_local_filename == NULL
1780           || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1781                || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1782         {
1783           int local_filename_len = strlen (*hs->local_file);
1784           /* Resize the local file, allowing for ".html" preceded by
1785              optional ".NUMBER".  */
1786           *hs->local_file = xrealloc (*hs->local_file,
1787                                       local_filename_len + 24 + sizeof (".html"));
1788           strcpy(*hs->local_file + local_filename_len, ".html");
1789           /* If clobbering is not allowed and the file, as named,
1790              exists, tack on ".NUMBER.html" instead. */
1791           if (!ALLOW_CLOBBER)
1792             {
1793               int ext_num = 1;
1794               do
1795                 sprintf (*hs->local_file + local_filename_len,
1796                          ".%d.html", ext_num++);
1797               while (file_exists_p (*hs->local_file));
1798             }
1799           *dt |= ADDED_HTML_EXTENSION;
1800         }
1801     }
1802
1803   if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
1804     {
1805       /* If `-c' is in use and the file has been fully downloaded (or
1806          the remote file has shrunk), Wget effectively requests bytes
1807          after the end of file and the server response with 416.  */
1808       logputs (LOG_VERBOSE, _("\
1809 \n    The file is already fully retrieved; nothing to do.\n\n"));
1810       /* In case the caller inspects. */
1811       hs->len = contlen;
1812       hs->res = 0;
1813       /* Mark as successfully retrieved. */
1814       *dt |= RETROKF;
1815       xfree_null (type);
1816       CLOSE_INVALIDATE (sock);  /* would be CLOSE_FINISH, but there
1817                                    might be more bytes in the body. */
1818       return RETRUNNEEDED;
1819     }
1820   if ((contrange != 0 && contrange != hs->restval)
1821       || (H_PARTIAL (statcode) && !contrange))
1822     {
1823       /* The Range request was somehow misunderstood by the server.
1824          Bail out.  */
1825       xfree_null (type);
1826       CLOSE_INVALIDATE (sock);
1827       return RANGEERR;
1828     }
1829   hs->contlen = contlen + contrange;
1830
1831   if (opt.verbose)
1832     {
1833       if (*dt & RETROKF)
1834         {
1835           /* No need to print this output if the body won't be
1836              downloaded at all, or if the original server response is
1837              printed.  */
1838           logputs (LOG_VERBOSE, _("Length: "));
1839           if (contlen != -1)
1840             {
1841               logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange));
1842               if (contlen + contrange >= 1024)
1843                 logprintf (LOG_VERBOSE, " (%s)",
1844                            human_readable (contlen + contrange));
1845               if (contrange)
1846                 {
1847                   if (contlen >= 1024)
1848                     logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
1849                                with_thousand_seps (contlen),
1850                                human_readable (contlen));
1851                   else
1852                     logprintf (LOG_VERBOSE, _(", %s remaining"),
1853                                with_thousand_seps (contlen));
1854                 }
1855             }
1856           else
1857             logputs (LOG_VERBOSE,
1858                      opt.ignore_length ? _("ignored") : _("unspecified"));
1859           if (type)
1860             logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
1861           else
1862             logputs (LOG_VERBOSE, "\n");
1863         }
1864     }
1865   xfree_null (type);
1866   type = NULL;                  /* We don't need it any more.  */
1867
1868   /* Return if we have no intention of further downloading.  */
1869   if (!(*dt & RETROKF) || head_only)
1870     {
1871       /* In case the caller cares to look...  */
1872       hs->len = 0;
1873       hs->res = 0;
1874       xfree_null (type);
1875       /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
1876          servers not to send body in response to a HEAD request.  If
1877          you encounter such a server (more likely a broken CGI), use
1878          `--no-http-keep-alive'.  */
1879       CLOSE_FINISH (sock);
1880       return RETRFINISHED;
1881     }
1882
1883   /* Open the local file.  */
1884   if (!output_stream)
1885     {
1886       mkalldirs (*hs->local_file);
1887       if (opt.backups)
1888         rotate_backups (*hs->local_file);
1889       if (hs->restval)
1890         fp = fopen (*hs->local_file, "ab");
1891       else if (ALLOW_CLOBBER)
1892         fp = fopen (*hs->local_file, "wb");
1893       else
1894         {
1895           fp = fopen_excl (*hs->local_file, true);
1896           if (!fp && errno == EEXIST)
1897             {
1898               /* We cannot just invent a new name and use it (which is
1899                  what functions like unique_create typically do)
1900                  because we told the user we'd use this name.
1901                  Instead, return and retry the download.  */
1902               logprintf (LOG_NOTQUIET,
1903                          _("%s has sprung into existence.\n"),
1904                          *hs->local_file);
1905               CLOSE_INVALIDATE (sock);
1906               return FOPEN_EXCL_ERR;
1907             }
1908         }
1909       if (!fp)
1910         {
1911           logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1912           CLOSE_INVALIDATE (sock);
1913           return FOPENERR;
1914         }
1915     }
1916   else
1917     fp = output_stream;
1918
1919   /* #### This confuses the timestamping code that checks for file
1920      size.  Maybe we should save some additional information?  */
1921   if (opt.save_headers)
1922     fwrite (head, 1, strlen (head), fp);
1923
1924   /* Now we no longer need to store the response header. */
1925   xfree (head);
1926
1927   /* Download the request body.  */
1928   flags = 0;
1929   if (keep_alive)
1930     flags |= rb_read_exactly;
1931   if (hs->restval > 0 && contrange == 0)
1932     /* If the server ignored our range request, instruct fd_read_body
1933        to skip the first RESTVAL bytes of body.  */
1934     flags |= rb_skip_startpos;
1935   hs->len = hs->restval;
1936   hs->rd_size = 0;
1937   hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
1938                           hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
1939                           flags);
1940
1941   if (hs->res >= 0)
1942     CLOSE_FINISH (sock);
1943   else
1944     CLOSE_INVALIDATE (sock);
1945
1946   {
1947     /* Close or flush the file.  We have to be careful to check for
1948        error here.  Checking the result of fwrite() is not enough --
1949        errors could go unnoticed!  */
1950     int flush_res;
1951     if (!output_stream)
1952       flush_res = fclose (fp);
1953     else
1954       flush_res = fflush (fp);
1955     if (flush_res == EOF)
1956       hs->res = -2;
1957   }
1958   if (hs->res == -2)
1959     return FWRITEERR;
1960   return RETRFINISHED;
1961 }
1962
1963 /* The genuine HTTP loop!  This is the part where the retrieval is
1964    retried, and retried, and retried, and...  */
1965 uerr_t
1966 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1967            int *dt, struct url *proxy)
1968 {
1969   int count;
1970   bool use_ts, got_head = false;/* time-stamping info */
1971   char *filename_plus_orig_suffix;
1972   char *local_filename = NULL;
1973   char *tms, *locf, *tmrate;
1974   uerr_t err;
1975   time_t tml = -1, tmr = -1;    /* local and remote time-stamps */
1976   wgint local_size = 0;         /* the size of the local file */
1977   size_t filename_len;
1978   struct http_stat hstat;       /* HTTP status */
1979   struct_stat st;
1980   char *dummy = NULL;
1981
1982   /* This used to be done in main(), but it's a better idea to do it
1983      here so that we don't go through the hoops if we're just using
1984      FTP or whatever. */
1985   if (opt.cookies)
1986     {
1987       if (!wget_cookie_jar)
1988         wget_cookie_jar = cookie_jar_new ();
1989       if (opt.cookies_input && !cookies_loaded_p)
1990         {
1991           cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1992           cookies_loaded_p = true;
1993         }
1994     }
1995
1996   *newloc = NULL;
1997
1998   /* Warn on (likely bogus) wildcard usage in HTTP.  */
1999   if (has_wildcards_p (u->path))
2000     logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2001
2002   xzero (hstat);
2003
2004   /* Determine the local filename.  */
2005   if (local_file && *local_file)
2006     hstat.local_file = local_file;
2007   else if (local_file && !opt.output_document)
2008     {
2009       *local_file = url_file_name (u);
2010       hstat.local_file = local_file;
2011     }
2012   else
2013     {
2014       dummy = url_file_name (u);
2015       hstat.local_file = &dummy;
2016       /* be honest about where we will save the file */
2017       if (local_file && opt.output_document)
2018         *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2019     }
2020
2021   if (!opt.output_document)
2022     locf = *hstat.local_file;
2023   else
2024     locf = opt.output_document;
2025
2026   hstat.referer = referer;
2027
2028   filename_len = strlen (*hstat.local_file);
2029   filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
2030
2031   if (opt.noclobber && file_exists_p (*hstat.local_file))
2032     {
2033       /* If opt.noclobber is turned on and file already exists, do not
2034          retrieve the file */
2035       logprintf (LOG_VERBOSE, _("\
2036 File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
2037       /* If the file is there, we suppose it's retrieved OK.  */
2038       *dt |= RETROKF;
2039
2040       /* #### Bogusness alert.  */
2041       /* If its suffix is "html" or "htm" or similar, assume text/html.  */
2042       if (has_html_suffix_p (*hstat.local_file))
2043         *dt |= TEXTHTML;
2044
2045       xfree_null (dummy);
2046       return RETROK;
2047     }
2048
2049   use_ts = false;
2050   if (opt.timestamping)
2051     {
2052       bool local_dot_orig_file_exists = false;
2053
2054       if (opt.backup_converted)
2055         /* If -K is specified, we'll act on the assumption that it was specified
2056            last time these files were downloaded as well, and instead of just
2057            comparing local file X against server file X, we'll compare local
2058            file X.orig (if extant, else X) against server file X.  If -K
2059            _wasn't_ specified last time, or the server contains files called
2060            *.orig, -N will be back to not operating correctly with -k. */
2061         {
2062           /* Would a single s[n]printf() call be faster?  --dan
2063
2064              Definitely not.  sprintf() is horribly slow.  It's a
2065              different question whether the difference between the two
2066              affects a program.  Usually I'd say "no", but at one
2067              point I profiled Wget, and found that a measurable and
2068              non-negligible amount of time was lost calling sprintf()
2069              in url.c.  Replacing sprintf with inline calls to
2070              strcpy() and number_to_string() made a difference.
2071              --hniksic */
2072           memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
2073           memcpy (filename_plus_orig_suffix + filename_len,
2074                   ".orig", sizeof (".orig"));
2075
2076           /* Try to stat() the .orig file. */
2077           if (stat (filename_plus_orig_suffix, &st) == 0)
2078             {
2079               local_dot_orig_file_exists = 1;
2080               local_filename = filename_plus_orig_suffix;
2081             }
2082         }
2083
2084       if (!local_dot_orig_file_exists)
2085         /* Couldn't stat() <file>.orig, so try to stat() <file>. */
2086         if (stat (*hstat.local_file, &st) == 0)
2087           local_filename = *hstat.local_file;
2088
2089       if (local_filename != NULL)
2090         /* There was a local file, so we'll check later to see if the version
2091            the server has is the same version we already have, allowing us to
2092            skip a download. */
2093         {
2094           use_ts = true;
2095           tml = st.st_mtime;
2096 #ifdef WINDOWS
2097           /* Modification time granularity is 2 seconds for Windows, so
2098              increase local time by 1 second for later comparison. */
2099           tml++;
2100 #endif
2101           local_size = st.st_size;
2102           got_head = false;
2103         }
2104     }
2105   /* Reset the counter.  */
2106   count = 0;
2107   *dt = 0;
2108   /* THE loop */
2109   do
2110     {
2111       /* Increment the pass counter.  */
2112       ++count;
2113       sleep_between_retrievals (count);
2114       /* Get the current time string.  */
2115       tms = time_str (NULL);
2116       /* Print fetch message, if opt.verbose.  */
2117       if (opt.verbose)
2118         {
2119           char *hurl = url_string (u, true);
2120           char tmp[256];
2121           strcpy (tmp, "        ");
2122           if (count > 1)
2123             sprintf (tmp, _("(try:%2d)"), count);
2124           logprintf (LOG_VERBOSE, "--%s--  %s\n  %s => `%s'\n",
2125                      tms, hurl, tmp, locf);
2126 #ifdef WINDOWS
2127           ws_changetitle (hurl);
2128 #endif
2129           xfree (hurl);
2130         }
2131
2132       /* Default document type is empty.  However, if spider mode is
2133          on or time-stamping is employed, HEAD_ONLY commands is
2134          encoded within *dt.  */
2135       if (opt.spider || (use_ts && !got_head))
2136         *dt |= HEAD_ONLY;
2137       else
2138         *dt &= ~HEAD_ONLY;
2139
2140       /* Decide whether or not to restart.  */
2141       if (opt.always_rest
2142           && stat (locf, &st) == 0
2143           && S_ISREG (st.st_mode))
2144         /* When -c is used, continue from on-disk size.  (Can't use
2145            hstat.len even if count>1 because we don't want a failed
2146            first attempt to clobber existing data.)  */
2147         hstat.restval = st.st_size;
2148       else if (count > 1)
2149         /* otherwise, continue where the previous try left off */
2150         hstat.restval = hstat.len;
2151       else
2152         hstat.restval = 0;
2153
2154       /* Decide whether to send the no-cache directive.  We send it in
2155          two cases:
2156            a) we're using a proxy, and we're past our first retrieval.
2157               Some proxies are notorious for caching incomplete data, so
2158               we require a fresh get.
2159            b) caching is explicitly inhibited. */
2160       if ((proxy && count > 1)  /* a */
2161           || !opt.allow_cache   /* b */
2162           )
2163         *dt |= SEND_NOCACHE;
2164       else
2165         *dt &= ~SEND_NOCACHE;
2166
2167       /* Try fetching the document, or at least its head.  */
2168       err = gethttp (u, &hstat, dt, proxy);
2169
2170       /* It's unfortunate that wget determines the local filename before finding
2171          out the Content-Type of the file.  Barring a major restructuring of the
2172          code, we need to re-set locf here, since gethttp() may have xrealloc()d
2173          *hstat.local_file to tack on ".html". */
2174       if (!opt.output_document)
2175         locf = *hstat.local_file;
2176
2177       /* Time?  */
2178       tms = time_str (NULL);
2179       /* Get the new location (with or without the redirection).  */
2180       if (hstat.newloc)
2181         *newloc = xstrdup (hstat.newloc);
2182       switch (err)
2183         {
2184         case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2185         case CONERROR: case READERR: case WRITEFAILED:
2186         case RANGEERR: case FOPEN_EXCL_ERR:
2187           /* Non-fatal errors continue executing the loop, which will
2188              bring them to "while" statement at the end, to judge
2189              whether the number of tries was exceeded.  */
2190           free_hstat (&hstat);
2191           printwhat (count, opt.ntry);
2192           if (err == FOPEN_EXCL_ERR)
2193             {
2194               /* Re-determine the file name. */
2195               if (local_file && *local_file)
2196                 {
2197                   xfree (*local_file);
2198                   *local_file = url_file_name (u);
2199                   hstat.local_file = local_file;
2200                 }
2201               else
2202                 {
2203                   xfree (dummy);
2204                   dummy = url_file_name (u);
2205                   hstat.local_file = &dummy;
2206                 }
2207               /* be honest about where we will save the file */
2208               if (local_file && opt.output_document)
2209                 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2210               if (!opt.output_document)
2211                 locf = *hstat.local_file;
2212               else
2213                 locf = opt.output_document;
2214             }
2215           continue;
2216         case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2217         case SSLINITFAILED: case CONTNOTSUPPORTED:
2218           /* Fatal errors just return from the function.  */
2219           free_hstat (&hstat);
2220           xfree_null (dummy);
2221           return err;
2222         case FWRITEERR: case FOPENERR:
2223           /* Another fatal error.  */
2224           logputs (LOG_VERBOSE, "\n");
2225           logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2226                      *hstat.local_file, strerror (errno));
2227           free_hstat (&hstat);
2228           xfree_null (dummy);
2229           return err;
2230         case CONSSLERR:
2231           /* Another fatal error.  */
2232           logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2233           free_hstat (&hstat);
2234           xfree_null (dummy);
2235           return err;
2236         case NEWLOCATION:
2237           /* Return the new location to the caller.  */
2238           if (!hstat.newloc)
2239             {
2240               logprintf (LOG_NOTQUIET,
2241                          _("ERROR: Redirection (%d) without location.\n"),
2242                          hstat.statcode);
2243               free_hstat (&hstat);
2244               xfree_null (dummy);
2245               return WRONGCODE;
2246             }
2247           free_hstat (&hstat);
2248           xfree_null (dummy);
2249           return NEWLOCATION;
2250         case RETRUNNEEDED:
2251           /* The file was already fully retrieved. */
2252           free_hstat (&hstat);
2253           xfree_null (dummy);
2254           return RETROK;
2255         case RETRFINISHED:
2256           /* Deal with you later.  */
2257           break;
2258         default:
2259           /* All possibilities should have been exhausted.  */
2260           abort ();
2261         }
2262       if (!(*dt & RETROKF))
2263         {
2264           if (!opt.verbose)
2265             {
2266               /* #### Ugly ugly ugly! */
2267               char *hurl = url_string (u, true);
2268               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2269               xfree (hurl);
2270             }
2271           logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2272                      tms, hstat.statcode, escnonprint (hstat.error));
2273           logputs (LOG_VERBOSE, "\n");
2274           free_hstat (&hstat);
2275           xfree_null (dummy);
2276           return WRONGCODE;
2277         }
2278
2279       /* Did we get the time-stamp?  */
2280       if (!got_head)
2281         {
2282           if (opt.timestamping && !hstat.remote_time)
2283             {
2284               logputs (LOG_NOTQUIET, _("\
2285 Last-modified header missing -- time-stamps turned off.\n"));
2286             }
2287           else if (hstat.remote_time)
2288             {
2289               /* Convert the date-string into struct tm.  */
2290               tmr = http_atotm (hstat.remote_time);
2291               if (tmr == (time_t) (-1))
2292                 logputs (LOG_VERBOSE, _("\
2293 Last-modified header invalid -- time-stamp ignored.\n"));
2294             }
2295         }
2296
2297       /* The time-stamping section.  */
2298       if (use_ts)
2299         {
2300           got_head = true;
2301           *dt &= ~HEAD_ONLY;
2302           use_ts = false;               /* no more time-stamping */
2303           count = 0;            /* the retrieve count for HEAD is
2304                                    reset */
2305           if (hstat.remote_time && tmr != (time_t) (-1))
2306             {
2307               /* Now time-stamping can be used validly.  Time-stamping
2308                  means that if the sizes of the local and remote file
2309                  match, and local file is newer than the remote file,
2310                  it will not be retrieved.  Otherwise, the normal
2311                  download procedure is resumed.  */
2312               if (tml >= tmr &&
2313                   (hstat.contlen == -1 || local_size == hstat.contlen))
2314                 {
2315                   logprintf (LOG_VERBOSE, _("\
2316 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2317                              local_filename);
2318                   free_hstat (&hstat);
2319                   xfree_null (dummy);
2320                   return RETROK;
2321                 }
2322               else if (tml >= tmr)
2323                 logprintf (LOG_VERBOSE, _("\
2324 The sizes do not match (local %s) -- retrieving.\n"),
2325                            number_to_static_string (local_size));
2326               else
2327                 logputs (LOG_VERBOSE,
2328                          _("Remote file is newer, retrieving.\n"));
2329             }
2330           free_hstat (&hstat);
2331           continue;
2332         }
2333       if ((tmr != (time_t) (-1))
2334           && !opt.spider
2335           && ((hstat.len == hstat.contlen) ||
2336               ((hstat.res == 0) &&
2337                ((hstat.contlen == -1) ||
2338                 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2339         {
2340           /* #### This code repeats in http.c and ftp.c.  Move it to a
2341              function!  */
2342           const char *fl = NULL;
2343           if (opt.output_document)
2344             {
2345               if (output_stream_regular)
2346                 fl = opt.output_document;
2347             }
2348           else
2349             fl = *hstat.local_file;
2350           if (fl)
2351             touch (fl, tmr);
2352         }
2353       /* End of time-stamping section.  */
2354
2355       if (opt.spider)
2356         {
2357           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
2358                      escnonprint (hstat.error));
2359           xfree_null (dummy);
2360           return RETROK;
2361         }
2362
2363       tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
2364
2365       if (hstat.len == hstat.contlen)
2366         {
2367           if (*dt & RETROKF)
2368             {
2369               logprintf (LOG_VERBOSE,
2370                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2371                          tms, tmrate, locf,
2372                          number_to_static_string (hstat.len),
2373                          number_to_static_string (hstat.contlen));
2374               logprintf (LOG_NONVERBOSE,
2375                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2376                          tms, u->url,
2377                          number_to_static_string (hstat.len),
2378                          number_to_static_string (hstat.contlen),
2379                          locf, count);
2380             }
2381           ++opt.numurls;
2382           total_downloaded_bytes += hstat.len;
2383
2384           /* Remember that we downloaded the file for later ".orig" code. */
2385           if (*dt & ADDED_HTML_EXTENSION)
2386             downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2387           else
2388             downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2389
2390           free_hstat (&hstat);
2391           xfree_null (dummy);
2392           return RETROK;
2393         }
2394       else if (hstat.res == 0) /* No read error */
2395         {
2396           if (hstat.contlen == -1)  /* We don't know how much we were supposed
2397                                        to get, so assume we succeeded. */
2398             {
2399               if (*dt & RETROKF)
2400                 {
2401                   logprintf (LOG_VERBOSE,
2402                              _("%s (%s) - `%s' saved [%s]\n\n"),
2403                              tms, tmrate, locf,
2404                              number_to_static_string (hstat.len));
2405                   logprintf (LOG_NONVERBOSE,
2406                              "%s URL:%s [%s] -> \"%s\" [%d]\n",
2407                              tms, u->url, number_to_static_string (hstat.len),
2408                              locf, count);
2409                 }
2410               ++opt.numurls;
2411               total_downloaded_bytes += hstat.len;
2412
2413               /* Remember that we downloaded the file for later ".orig" code. */
2414               if (*dt & ADDED_HTML_EXTENSION)
2415                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2416               else
2417                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2418
2419               free_hstat (&hstat);
2420               xfree_null (dummy);
2421               return RETROK;
2422             }
2423           else if (hstat.len < hstat.contlen) /* meaning we lost the
2424                                                  connection too soon */
2425             {
2426               logprintf (LOG_VERBOSE,
2427                          _("%s (%s) - Connection closed at byte %s. "),
2428                          tms, tmrate, number_to_static_string (hstat.len));
2429               printwhat (count, opt.ntry);
2430               free_hstat (&hstat);
2431               continue;
2432             }
2433           else if (!opt.kill_longer) /* meaning we got more than expected */
2434             {
2435               logprintf (LOG_VERBOSE,
2436                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2437                          tms, tmrate, locf,
2438                          number_to_static_string (hstat.len),
2439                          number_to_static_string (hstat.contlen));
2440               logprintf (LOG_NONVERBOSE,
2441                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2442                          tms, u->url,
2443                          number_to_static_string (hstat.len),
2444                          number_to_static_string (hstat.contlen),
2445                          locf, count);
2446               ++opt.numurls;
2447               total_downloaded_bytes += hstat.len;
2448
2449               /* Remember that we downloaded the file for later ".orig" code. */
2450               if (*dt & ADDED_HTML_EXTENSION)
2451                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2452               else
2453                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2454
2455               free_hstat (&hstat);
2456               xfree_null (dummy);
2457               return RETROK;
2458             }
2459           else                  /* the same, but not accepted */
2460             {
2461               logprintf (LOG_VERBOSE,
2462                          _("%s (%s) - Connection closed at byte %s/%s. "),
2463                          tms, tmrate,
2464                          number_to_static_string (hstat.len),
2465                          number_to_static_string (hstat.contlen));
2466               printwhat (count, opt.ntry);
2467               free_hstat (&hstat);
2468               continue;
2469             }
2470         }
2471       else                      /* now hstat.res can only be -1 */
2472         {
2473           if (hstat.contlen == -1)
2474             {
2475               logprintf (LOG_VERBOSE,
2476                          _("%s (%s) - Read error at byte %s (%s)."),
2477                          tms, tmrate, number_to_static_string (hstat.len),
2478                          strerror (errno));
2479               printwhat (count, opt.ntry);
2480               free_hstat (&hstat);
2481               continue;
2482             }
2483           else                  /* hstat.res == -1 and contlen is given */
2484             {
2485               logprintf (LOG_VERBOSE,
2486                          _("%s (%s) - Read error at byte %s/%s (%s). "),
2487                          tms, tmrate,
2488                          number_to_static_string (hstat.len),
2489                          number_to_static_string (hstat.contlen),
2490                          strerror (errno));
2491               printwhat (count, opt.ntry);
2492               free_hstat (&hstat);
2493               continue;
2494             }
2495         }
2496       /* not reached */
2497     }
2498   while (!opt.ntry || (count < opt.ntry));
2499   return TRYLIMEXC;
2500 }
2501 \f
2502 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2503    than local timezone.
2504
2505    mktime is similar but assumes struct tm, also known as the
2506    "broken-down" form of time, is in local time zone.  mktime_from_utc
2507    uses mktime to make the conversion understanding that an offset
2508    will be introduced by the local time assumption.
2509
2510    mktime_from_utc then measures the introduced offset by applying
2511    gmtime to the initial result and applying mktime to the resulting
2512    "broken-down" form.  The difference between the two mktime results
2513    is the measured offset which is then subtracted from the initial
2514    mktime result to yield a calendar time which is the value returned.
2515
2516    tm_isdst in struct tm is set to 0 to force mktime to introduce a
2517    consistent offset (the non DST offset) since tm and tm+o might be
2518    on opposite sides of a DST change.
2519
2520    Some implementations of mktime return -1 for the nonexistent
2521    localtime hour at the beginning of DST.  In this event, use
2522    mktime(tm - 1hr) + 3600.
2523
2524    Schematically
2525      mktime(tm)   --> t+o
2526      gmtime(t+o)  --> tm+o
2527      mktime(tm+o) --> t+2o
2528      t+o - (t+2o - t+o) = t
2529
2530    Note that glibc contains a function of the same purpose named
2531    `timegm' (reverse of gmtime).  But obviously, it is not universally
2532    available, and unfortunately it is not straightforwardly
2533    extractable for use here.  Perhaps configure should detect timegm
2534    and use it where available.
2535
2536    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2537    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2538    Further improved by Roger with assistance from Edward J. Sabol
2539    based on input by Jamie Zawinski.  */
2540
2541 static time_t
2542 mktime_from_utc (struct tm *t)
2543 {
2544   time_t tl, tb;
2545   struct tm *tg;
2546
2547   tl = mktime (t);
2548   if (tl == -1)
2549     {
2550       t->tm_hour--;
2551       tl = mktime (t);
2552       if (tl == -1)
2553         return -1; /* can't deal with output from strptime */
2554       tl += 3600;
2555     }
2556   tg = gmtime (&tl);
2557   tg->tm_isdst = 0;
2558   tb = mktime (tg);
2559   if (tb == -1)
2560     {
2561       tg->tm_hour--;
2562       tb = mktime (tg);
2563       if (tb == -1)
2564         return -1; /* can't deal with output from gmtime */
2565       tb += 3600;
2566     }
2567   return (tl - (tb - tl));
2568 }
2569
2570 /* Check whether the result of strptime() indicates success.
2571    strptime() returns the pointer to how far it got to in the string.
2572    The processing has been successful if the string is at `GMT' or
2573    `+X', or at the end of the string.
2574
2575    In extended regexp parlance, the function returns 1 if P matches
2576    "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
2577    can return) is considered a failure and 0 is returned.  */
2578 static bool
2579 check_end (const char *p)
2580 {
2581   if (!p)
2582     return false;
2583   while (ISSPACE (*p))
2584     ++p;
2585   if (!*p
2586       || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2587       || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2588     return true;
2589   else
2590     return false;
2591 }
2592
2593 /* Convert the textual specification of time in TIME_STRING to the
2594    number of seconds since the Epoch.
2595
2596    TIME_STRING can be in any of the three formats RFC2616 allows the
2597    HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2598    as well as the time format used in the Set-Cookie header.
2599    Timezones are ignored, and should be GMT.
2600
2601    Return the computed time_t representation, or -1 if the conversion
2602    fails.
2603
2604    This function uses strptime with various string formats for parsing
2605    TIME_STRING.  This results in a parser that is not as lenient in
2606    interpreting TIME_STRING as I would like it to be.  Being based on
2607    strptime, it always allows shortened months, one-digit days, etc.,
2608    but due to the multitude of formats in which time can be
2609    represented, an ideal HTTP time parser would be even more
2610    forgiving.  It should completely ignore things like week days and
2611    concentrate only on the various forms of representing years,
2612    months, days, hours, minutes, and seconds.  For example, it would
2613    be nice if it accepted ISO 8601 out of the box.
2614
2615    I've investigated free and PD code for this purpose, but none was
2616    usable.  getdate was big and unwieldy, and had potential copyright
2617    issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
2618    distributed with phttpd, is excellent, but we cannot use it because
2619    it is not assigned to the FSF.  So I stuck it with strptime.  */
2620
2621 time_t
2622 http_atotm (const char *time_string)
2623 {
2624   /* NOTE: Solaris strptime man page claims that %n and %t match white
2625      space, but that's not universally available.  Instead, we simply
2626      use ` ' to mean "skip all WS", which works under all strptime
2627      implementations I've tested.  */
2628
2629   static const char *time_formats[] = {
2630     "%a, %d %b %Y %T",          /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2631     "%A, %d-%b-%y %T",          /* rfc850:  Thursday, 29-Jan-98 22:12:57 */
2632     "%a %b %d %T %Y",           /* asctime: Thu Jan 29 22:12:57 1998 */
2633     "%a, %d-%b-%Y %T"           /* cookies: Thu, 29-Jan-1998 22:12:57
2634                                    (used in Set-Cookie, defined in the
2635                                    Netscape cookie specification.) */
2636   };
2637   int i;
2638
2639   for (i = 0; i < countof (time_formats); i++)
2640     {
2641       struct tm t;
2642
2643       /* Some versions of strptime use the existing contents of struct
2644          tm to recalculate the date according to format.  Zero it out
2645          to prevent garbage from the stack influencing strptime.  */
2646       xzero (t);
2647
2648       /* Solaris strptime fails to recognize English month names in
2649          non-English locales, which we work around by not setting the
2650          LC_TIME category.  Another way would be to temporarily set
2651          locale to C before invoking strptime, but that's slow and
2652          messy.  GNU strptime does not have this problem because it
2653          recognizes English month names along with the local ones.  */
2654
2655       if (check_end (strptime (time_string, time_formats[i], &t)))
2656         return mktime_from_utc (&t);
2657     }
2658
2659   /* All formats have failed.  */
2660   return -1;
2661 }
2662 \f
2663 /* Authorization support: We support three authorization schemes:
2664
2665    * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2666
2667    * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2668    consisting of answering to the server's challenge with the proper
2669    MD5 digests.
2670
2671    * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2672    Stenberg for libcurl.  Like digest, NTLM is based on a
2673    challenge-response mechanism, but unlike digest, it is non-standard
2674    (authenticates TCP connections rather than requests), undocumented
2675    and Microsoft-specific.  */
2676
2677 /* Create the authentication header contents for the `Basic' scheme.
2678    This is done by encoding the string "USER:PASS" to base64 and
2679    prepending the string "Basic " in front of it.  */
2680
2681 static char *
2682 basic_authentication_encode (const char *user, const char *passwd)
2683 {
2684   char *t1, *t2;
2685   int len1 = strlen (user) + 1 + strlen (passwd);
2686
2687   t1 = (char *)alloca (len1 + 1);
2688   sprintf (t1, "%s:%s", user, passwd);
2689
2690   t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2691   base64_encode (t1, len1, t2);
2692
2693   return concat_strings ("Basic ", t2, (char *) 0);
2694 }
2695
2696 #define SKIP_WS(x) do {                         \
2697   while (ISSPACE (*(x)))                        \
2698     ++(x);                                      \
2699 } while (0)
2700
2701 #ifdef ENABLE_DIGEST
2702 /* Parse HTTP `WWW-Authenticate:' header.  AU points to the beginning
2703    of a field in such a header.  If the field is the one specified by
2704    ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2705    digest authorization code), extract its value in the (char*)
2706    variable pointed by RET.  Returns negative on a malformed header,
2707    or number of bytes that have been parsed by this call.  */
2708 static int
2709 extract_header_attr (const char *au, const char *attr_name, char **ret)
2710 {
2711   const char *ep;
2712   const char *cp = au;
2713
2714   if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2715     {
2716       cp += strlen (attr_name);
2717       if (!*cp)
2718         return -1;
2719       SKIP_WS (cp);
2720       if (*cp != '=')
2721         return -1;
2722       if (!*++cp)
2723         return -1;
2724       SKIP_WS (cp);
2725       if (*cp != '\"')
2726         return -1;
2727       if (!*++cp)
2728         return -1;
2729       for (ep = cp; *ep && *ep != '\"'; ep++)
2730         ;
2731       if (!*ep)
2732         return -1;
2733       xfree_null (*ret);
2734       *ret = strdupdelim (cp, ep);
2735       return ep - au + 1;
2736     }
2737   else
2738     return 0;
2739 }
2740
2741 /* Dump the hexadecimal representation of HASH to BUF.  HASH should be
2742    an array of 16 bytes containing the hash keys, and BUF should be a
2743    buffer of 33 writable characters (32 for hex digits plus one for
2744    zero termination).  */
2745 static void
2746 dump_hash (unsigned char *buf, const unsigned char *hash)
2747 {
2748   int i;
2749
2750   for (i = 0; i < MD5_HASHLEN; i++, hash++)
2751     {
2752       *buf++ = XNUM_TO_digit (*hash >> 4);
2753       *buf++ = XNUM_TO_digit (*hash & 0xf);
2754     }
2755   *buf = '\0';
2756 }
2757
2758 /* Take the line apart to find the challenge, and compose a digest
2759    authorization header.  See RFC2069 section 2.1.2.  */
2760 static char *
2761 digest_authentication_encode (const char *au, const char *user,
2762                               const char *passwd, const char *method,
2763                               const char *path)
2764 {
2765   static char *realm, *opaque, *nonce;
2766   static struct {
2767     const char *name;
2768     char **variable;
2769   } options[] = {
2770     { "realm", &realm },
2771     { "opaque", &opaque },
2772     { "nonce", &nonce }
2773   };
2774   char *res;
2775
2776   realm = opaque = nonce = NULL;
2777
2778   au += 6;                      /* skip over `Digest' */
2779   while (*au)
2780     {
2781       int i;
2782
2783       SKIP_WS (au);
2784       for (i = 0; i < countof (options); i++)
2785         {
2786           int skip = extract_header_attr (au, options[i].name,
2787                                           options[i].variable);
2788           if (skip < 0)
2789             {
2790               xfree_null (realm);
2791               xfree_null (opaque);
2792               xfree_null (nonce);
2793               return NULL;
2794             }
2795           else if (skip)
2796             {
2797               au += skip;
2798               break;
2799             }
2800         }
2801       if (i == countof (options))
2802         {
2803           while (*au && *au != '=')
2804             au++;
2805           if (*au && *++au)
2806             {
2807               SKIP_WS (au);
2808               if (*au == '\"')
2809                 {
2810                   au++;
2811                   while (*au && *au != '\"')
2812                     au++;
2813                   if (*au)
2814                     au++;
2815                 }
2816             }
2817         }
2818       while (*au && *au != ',')
2819         au++;
2820       if (*au)
2821         au++;
2822     }
2823   if (!realm || !nonce || !user || !passwd || !path || !method)
2824     {
2825       xfree_null (realm);
2826       xfree_null (opaque);
2827       xfree_null (nonce);
2828       return NULL;
2829     }
2830
2831   /* Calculate the digest value.  */
2832   {
2833     ALLOCA_MD5_CONTEXT (ctx);
2834     unsigned char hash[MD5_HASHLEN];
2835     unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2836     unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2837
2838     /* A1BUF = H(user ":" realm ":" password) */
2839     gen_md5_init (ctx);
2840     gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2841     gen_md5_update ((unsigned char *)":", 1, ctx);
2842     gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2843     gen_md5_update ((unsigned char *)":", 1, ctx);
2844     gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2845     gen_md5_finish (ctx, hash);
2846     dump_hash (a1buf, hash);
2847
2848     /* A2BUF = H(method ":" path) */
2849     gen_md5_init (ctx);
2850     gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2851     gen_md5_update ((unsigned char *)":", 1, ctx);
2852     gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2853     gen_md5_finish (ctx, hash);
2854     dump_hash (a2buf, hash);
2855
2856     /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2857     gen_md5_init (ctx);
2858     gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2859     gen_md5_update ((unsigned char *)":", 1, ctx);
2860     gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2861     gen_md5_update ((unsigned char *)":", 1, ctx);
2862     gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2863     gen_md5_finish (ctx, hash);
2864     dump_hash (response_digest, hash);
2865
2866     res = xmalloc (strlen (user)
2867                    + strlen (user)
2868                    + strlen (realm)
2869                    + strlen (nonce)
2870                    + strlen (path)
2871                    + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2872                    + (opaque ? strlen (opaque) : 0)
2873                    + 128);
2874     sprintf (res, "Digest \
2875 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2876              user, realm, nonce, path, response_digest);
2877     if (opaque)
2878       {
2879         char *p = res + strlen (res);
2880         strcat (p, ", opaque=\"");
2881         strcat (p, opaque);
2882         strcat (p, "\"");
2883       }
2884   }
2885   return res;
2886 }
2887 #endif /* ENABLE_DIGEST */
2888
2889 /* Computing the size of a string literal must take into account that
2890    value returned by sizeof includes the terminating \0.  */
2891 #define STRSIZE(literal) (sizeof (literal) - 1)
2892
2893 /* Whether chars in [b, e) begin with the literal string provided as
2894    first argument and are followed by whitespace or terminating \0.
2895    The comparison is case-insensitive.  */
2896 #define STARTS(literal, b, e)                           \
2897   ((e) - (b) >= STRSIZE (literal)                       \
2898    && 0 == strncasecmp (b, literal, STRSIZE (literal))  \
2899    && ((e) - (b) == STRSIZE (literal)                   \
2900        || ISSPACE (b[STRSIZE (literal)])))
2901
2902 static bool
2903 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2904 {
2905   return STARTS ("Basic", hdrbeg, hdrend)
2906 #ifdef ENABLE_DIGEST
2907     || STARTS ("Digest", hdrbeg, hdrend)
2908 #endif
2909 #ifdef ENABLE_NTLM
2910     || STARTS ("NTLM", hdrbeg, hdrend)
2911 #endif
2912     ;
2913 }
2914
2915 #undef STARTS
2916
2917 /* Create the HTTP authorization request header.  When the
2918    `WWW-Authenticate' response header is seen, according to the
2919    authorization scheme specified in that header (`Basic' and `Digest'
2920    are supported by the current implementation), produce an
2921    appropriate HTTP authorization request header.  */
2922 static char *
2923 create_authorization_line (const char *au, const char *user,
2924                            const char *passwd, const char *method,
2925                            const char *path, bool *finished)
2926 {
2927   /* We are called only with known schemes, so we can dispatch on the
2928      first letter. */
2929   switch (TOUPPER (*au))
2930     {
2931     case 'B':                   /* Basic */
2932       *finished = true;
2933       return basic_authentication_encode (user, passwd);
2934 #ifdef ENABLE_DIGEST
2935     case 'D':                   /* Digest */
2936       *finished = true;
2937       return digest_authentication_encode (au, user, passwd, method, path);
2938 #endif
2939 #ifdef ENABLE_NTLM
2940     case 'N':                   /* NTLM */
2941       if (!ntlm_input (&pconn.ntlm, au))
2942         {
2943           *finished = true;
2944           return NULL;
2945         }
2946       return ntlm_output (&pconn.ntlm, user, passwd, finished);
2947 #endif
2948     default:
2949       /* We shouldn't get here -- this function should be only called
2950          with values approved by known_authentication_scheme_p.  */
2951       abort ();
2952     }
2953 }
2954 \f
2955 void
2956 save_cookies (void)
2957 {
2958   if (wget_cookie_jar)
2959     cookie_jar_save (wget_cookie_jar, opt.cookies_output);
2960 }
2961
2962 void
2963 http_cleanup (void)
2964 {
2965   xfree_null (pconn.host);
2966   if (wget_cookie_jar)
2967     cookie_jar_delete (wget_cookie_jar);
2968 }