sjero.net Git - wget/blob - src/http.c

   1 /* HTTP support.
   2    Copyright (C) 1996-2005 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9  (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software Foundation, Inc.,
  18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif
  38 #include <assert.h>
  39 #include <errno.h>
  40 #include <time.h>
  41 #include <locale.h>
  42
  43 #include "wget.h"
  44 #include "http.h"
  45 #include "utils.h"
  46 #include "url.h"
  47 #include "host.h"
  48 #include "retr.h"
  49 #include "connect.h"
  50 #include "netrc.h"
  51 #ifdef HAVE_SSL
  52 # include "ssl.h"
  53 #endif
  54 #ifdef ENABLE_NTLM
  55 # include "http-ntlm.h"
  56 #endif
  57 #include "cookies.h"
  58 #ifdef ENABLE_DIGEST
  59 # include "gen-md5.h"
  60 #endif
  61 #include "convert.h"
  62
  63 extern char *version_string;
  64
  65 #ifndef MIN
  66 # define MIN(x, y) ((x) > (y) ? (y) : (x))
  67 #endif
  68
  69 \f
  70 static bool cookies_loaded_p;
  71 static struct cookie_jar *wget_cookie_jar;
  72
  73 #define TEXTHTML_S "text/html"
  74 #define TEXTXHTML_S "application/xhtml+xml"
  75
  76 /* Some status code validation macros: */
  77 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
  78 #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
  79 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY           \
  80                          || (x) == HTTP_STATUS_MOVED_TEMPORARILY        \
  81                          || (x) == HTTP_STATUS_SEE_OTHER                \
  82                          || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
  83
  84 /* HTTP/1.0 status codes from RFC1945, provided for reference.  */
  85 /* Successful 2xx.  */
  86 #define HTTP_STATUS_OK                  200
  87 #define HTTP_STATUS_CREATED             201
  88 #define HTTP_STATUS_ACCEPTED            202
  89 #define HTTP_STATUS_NO_CONTENT          204
  90 #define HTTP_STATUS_PARTIAL_CONTENTS    206
  91
  92 /* Redirection 3xx.  */
  93 #define HTTP_STATUS_MULTIPLE_CHOICES    300
  94 #define HTTP_STATUS_MOVED_PERMANENTLY   301
  95 #define HTTP_STATUS_MOVED_TEMPORARILY   302
  96 #define HTTP_STATUS_SEE_OTHER           303 /* from HTTP/1.1 */
  97 #define HTTP_STATUS_NOT_MODIFIED        304
  98 #define HTTP_STATUS_TEMPORARY_REDIRECT  307 /* from HTTP/1.1 */
  99
 100 /* Client error 4xx.  */
 101 #define HTTP_STATUS_BAD_REQUEST         400
 102 #define HTTP_STATUS_UNAUTHORIZED        401
 103 #define HTTP_STATUS_FORBIDDEN           403
 104 #define HTTP_STATUS_NOT_FOUND           404
 105 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
 106
 107 /* Server errors 5xx.  */
 108 #define HTTP_STATUS_INTERNAL            500
 109 #define HTTP_STATUS_NOT_IMPLEMENTED     501
 110 #define HTTP_STATUS_BAD_GATEWAY         502
 111 #define HTTP_STATUS_UNAVAILABLE         503
 112 \f
 113 enum rp {
 114   rel_none, rel_name, rel_value, rel_both
 115 };
 116
 117 struct request {
 118   const char *method;
 119   char *arg;
 120
 121   struct request_header {
 122     char *name, *value;
 123     enum rp release_policy;
 124   } *headers;
 125   int hcount, hcapacity;
 126 };
 127
 128 /* Create a new, empty request.  At least request_set_method must be
 129    called before the request can be used.  */
 130
 131 static struct request *
 132 request_new (void)
 133 {
 134   struct request *req = xnew0 (struct request);
 135   req->hcapacity = 8;
 136   req->headers = xnew_array (struct request_header, req->hcapacity);
 137   return req;
 138 }
 139
 140 /* Set the request's method and its arguments.  METH should be a
 141    literal string (or it should outlive the request) because it will
 142    not be freed.  ARG will be freed by request_free.  */
 143
 144 static void
 145 request_set_method (struct request *req, const char *meth, char *arg)
 146 {
 147   req->method = meth;
 148   req->arg = arg;
 149 }
 150
 151 /* Return the method string passed with the last call to
 152    request_set_method.  */
 153
 154 static const char *
 155 request_method (const struct request *req)
 156 {
 157   return req->method;
 158 }
 159
 160 /* Free one header according to the release policy specified with
 161    request_set_header.  */
 162
 163 static void
 164 release_header (struct request_header *hdr)
 165 {
 166   switch (hdr->release_policy)
 167     {
 168     case rel_none:
 169       break;
 170     case rel_name:
 171       xfree (hdr->name);
 172       break;
 173     case rel_value:
 174       xfree (hdr->value);
 175       break;
 176     case rel_both:
 177       xfree (hdr->name);
 178       xfree (hdr->value);
 179       break;
 180     }
 181 }
 182
 183 /* Set the request named NAME to VALUE.  Specifically, this means that
 184    a "NAME: VALUE\r\n" header line will be used in the request.  If a
 185    header with the same name previously existed in the request, its
 186    value will be replaced by this one.  A NULL value means do nothing.
 187
 188    RELEASE_POLICY determines whether NAME and VALUE should be released
 189    (freed) with request_free.  Allowed values are:
 190
 191     - rel_none     - don't free NAME or VALUE
 192     - rel_name     - free NAME when done
 193     - rel_value    - free VALUE when done
 194     - rel_both     - free both NAME and VALUE when done
 195
 196    Setting release policy is useful when arguments come from different
 197    sources.  For example:
 198
 199      // Don't free literal strings!
 200      request_set_header (req, "Pragma", "no-cache", rel_none);
 201
 202      // Don't free a global variable, we'll need it later.
 203      request_set_header (req, "Referer", opt.referer, rel_none);
 204
 205      // Value freshly allocated, free it when done.
 206      request_set_header (req, "Range",
 207                          aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
 208                          rel_value);
 209    */
 210
 211 static void
 212 request_set_header (struct request *req, char *name, char *value,
 213                     enum rp release_policy)
 214 {
 215   struct request_header *hdr;
 216   int i;
 217
 218   if (!value)
 219     {
 220       /* A NULL value is a no-op; if freeing the name is requested,
 221          free it now to avoid leaks.  */
 222       if (release_policy == rel_name || release_policy == rel_both)
 223         xfree (name);
 224       return;
 225     }
 226
 227   for (i = 0; i < req->hcount; i++)
 228     {
 229       hdr = &req->headers[i];
 230       if (0 == strcasecmp (name, hdr->name))
 231         {
 232           /* Replace existing header. */
 233           release_header (hdr);
 234           hdr->name = name;
 235           hdr->value = value;
 236           hdr->release_policy = release_policy;
 237           return;
 238         }
 239     }
 240
 241   /* Install new header. */
 242
 243   if (req->hcount >= req->hcapacity)
 244     {
 245       req->hcapacity <<= 1;
 246       req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
 247     }
 248   hdr = &req->headers[req->hcount++];
 249   hdr->name = name;
 250   hdr->value = value;
 251   hdr->release_policy = release_policy;
 252 }
 253
 254 /* Like request_set_header, but sets the whole header line, as
 255    provided by the user using the `--header' option.  For example,
 256    request_set_user_header (req, "Foo: bar") works just like
 257    request_set_header (req, "Foo", "bar").  */
 258
 259 static void
 260 request_set_user_header (struct request *req, const char *header)
 261 {
 262   char *name;
 263   const char *p = strchr (header, ':');
 264   if (!p)
 265     return;
 266   BOUNDED_TO_ALLOCA (header, p, name);
 267   ++p;
 268   while (ISSPACE (*p))
 269     ++p;
 270   request_set_header (req, xstrdup (name), (char *) p, rel_name);
 271 }
 272
 273 /* Remove the header with specified name from REQ.  Returns true if
 274    the header was actually removed, false otherwise.  */
 275
 276 static bool
 277 request_remove_header (struct request *req, char *name)
 278 {
 279   int i;
 280   for (i = 0; i < req->hcount; i++)
 281     {
 282       struct request_header *hdr = &req->headers[i];
 283       if (0 == strcasecmp (name, hdr->name))
 284         {
 285           release_header (hdr);
 286           /* Move the remaining headers by one. */
 287           if (i < req->hcount - 1)
 288             memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
 289           --req->hcount;
 290           return true;
 291         }
 292     }
 293   return false;
 294 }
 295
 296 #define APPEND(p, str) do {                     \
 297   int A_len = strlen (str);                     \
 298   memcpy (p, str, A_len);                       \
 299   p += A_len;                                   \
 300 } while (0)
 301
 302 /* Construct the request and write it to FD using fd_write.  */
 303
 304 static int
 305 request_send (const struct request *req, int fd)
 306 {
 307   char *request_string, *p;
 308   int i, size, write_error;
 309
 310   /* Count the request size. */
 311   size = 0;
 312
 313   /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
 314   size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
 315
 316   for (i = 0; i < req->hcount; i++)
 317     {
 318       struct request_header *hdr = &req->headers[i];
 319       /* NAME ": " VALUE "\r\n" */
 320       size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
 321     }
 322
 323   /* "\r\n\0" */
 324   size += 3;
 325
 326   p = request_string = alloca_array (char, size);
 327
 328   /* Generate the request. */
 329
 330   APPEND (p, req->method); *p++ = ' ';
 331   APPEND (p, req->arg);    *p++ = ' ';
 332   memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
 333
 334   for (i = 0; i < req->hcount; i++)
 335     {
 336       struct request_header *hdr = &req->headers[i];
 337       APPEND (p, hdr->name);
 338       *p++ = ':', *p++ = ' ';
 339       APPEND (p, hdr->value);
 340       *p++ = '\r', *p++ = '\n';
 341     }
 342
 343   *p++ = '\r', *p++ = '\n', *p++ = '\0';
 344   assert (p - request_string == size);
 345
 346 #undef APPEND
 347
 348   DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
 349
 350   /* Send the request to the server. */
 351
 352   write_error = fd_write (fd, request_string, size - 1, -1);
 353   if (write_error < 0)
 354     logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
 355                fd_errstr (fd));
 356   return write_error;
 357 }
 358
 359 /* Release the resources used by REQ. */
 360
 361 static void
 362 request_free (struct request *req)
 363 {
 364   int i;
 365   xfree_null (req->arg);
 366   for (i = 0; i < req->hcount; i++)
 367     release_header (&req->headers[i]);
 368   xfree_null (req->headers);
 369   xfree (req);
 370 }
 371
 372 /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
 373    PROMISED_SIZE bytes are sent over the wire -- if the file is
 374    longer, read only that much; if the file is shorter, report an error.  */
 375
 376 static int
 377 post_file (int sock, const char *file_name, wgint promised_size)
 378 {
 379   static char chunk[8192];
 380   wgint written = 0;
 381   int write_error;
 382   FILE *fp;
 383
 384   DEBUGP (("[writing POST file %s ... ", file_name));
 385
 386   fp = fopen (file_name, "rb");
 387   if (!fp)
 388     return -1;
 389   while (!feof (fp) && written < promised_size)
 390     {
 391       int towrite;
 392       int length = fread (chunk, 1, sizeof (chunk), fp);
 393       if (length == 0)
 394         break;
 395       towrite = MIN (promised_size - written, length);
 396       write_error = fd_write (sock, chunk, towrite, -1);
 397       if (write_error < 0)
 398         {
 399           fclose (fp);
 400           return -1;
 401         }
 402       written += towrite;
 403     }
 404   fclose (fp);
 405
 406   /* If we've written less than was promised, report a (probably
 407      nonsensical) error rather than break the promise.  */
 408   if (written < promised_size)
 409     {
 410       errno = EINVAL;
 411       return -1;
 412     }
 413
 414   assert (written == promised_size);
 415   DEBUGP (("done]\n"));
 416   return 0;
 417 }
 418 \f
 419 /* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
 420    If so, return the pointer to the position after the line, otherwise
 421    return NULL.  This is used as callback to fd_read_hunk.  The data
 422    between START and PEEKED has been read and cannot be "unread"; the
 423    data after PEEKED has only been peeked.  */
 424
 425 static const char *
 426 response_head_terminator (const char *start, const char *peeked, int peeklen)
 427 {
 428   const char *p, *end;
 429
 430   /* If at first peek, verify whether HUNK starts with "HTTP".  If
 431      not, this is a HTTP/0.9 request and we must bail out without
 432      reading anything.  */
 433   if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
 434     return start;
 435
 436   /* Look for "\n[\r]\n", and return the following position if found.
 437      Start two chars before the current to cover the possibility that
 438      part of the terminator (e.g. "\n\r") arrived in the previous
 439      batch.  */
 440   p = peeked - start < 2 ? start : peeked - 2;
 441   end = peeked + peeklen;
 442
 443   /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
 444   for (; p < end - 2; p++)
 445     if (*p == '\n')
 446       {
 447         if (p[1] == '\r' && p[2] == '\n')
 448           return p + 3;
 449         else if (p[1] == '\n')
 450           return p + 2;
 451       }
 452   /* p==end-2: check for \n\n directly preceding END. */
 453   if (p[0] == '\n' && p[1] == '\n')
 454     return p + 2;
 455
 456   return NULL;
 457 }
 458
 459 /* The maximum size of a single HTTP response we care to read.  Rather
 460    than being a limit of the reader implementation, this limit
 461    prevents Wget from slurping all available memory upon encountering
 462    malicious or buggy server output, thus protecting the user.  Define
 463    it to 0 to remove the limit.  */
 464
 465 #define HTTP_RESPONSE_MAX_SIZE 65536
 466
 467 /* Read the HTTP request head from FD and return it.  The error
 468    conditions are the same as with fd_read_hunk.
 469
 470    To support HTTP/0.9 responses, this function tries to make sure
 471    that the data begins with "HTTP".  If this is not the case, no data
 472    is read and an empty request is returned, so that the remaining
 473    data can be treated as body.  */
 474
 475 static char *
 476 read_http_response_head (int fd)
 477 {
 478   return fd_read_hunk (fd, response_head_terminator, 512,
 479                        HTTP_RESPONSE_MAX_SIZE);
 480 }
 481
 482 struct response {
 483   /* The response data. */
 484   const char *data;
 485
 486   /* The array of pointers that indicate where each header starts.
 487      For example, given this HTTP response:
 488
 489        HTTP/1.0 200 Ok
 490        Description: some
 491         text
 492        Etag: x
 493
 494      The headers are located like this:
 495
 496      "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
 497      ^                   ^                             ^          ^
 498      headers[0]          headers[1]                    headers[2] headers[3]
 499
 500      I.e. headers[0] points to the beginning of the request,
 501      headers[1] points to the end of the first header and the
 502      beginning of the second one, etc.  */
 503
 504   const char **headers;
 505 };
 506
 507 /* Create a new response object from the text of the HTTP response,
 508    available in HEAD.  That text is automatically split into
 509    constituent header lines for fast retrieval using
 510    resp_header_*.  */
 511
 512 static struct response *
 513 resp_new (const char *head)
 514 {
 515   const char *hdr;
 516   int count, size;
 517
 518   struct response *resp = xnew0 (struct response);
 519   resp->data = head;
 520
 521   if (*head == '\0')
 522     {
 523       /* Empty head means that we're dealing with a headerless
 524          (HTTP/0.9) response.  In that case, don't set HEADERS at
 525          all.  */
 526       return resp;
 527     }
 528
 529   /* Split HEAD into header lines, so that resp_header_* functions
 530      don't need to do this over and over again.  */
 531
 532   size = count = 0;
 533   hdr = head;
 534   while (1)
 535     {
 536       DO_REALLOC (resp->headers, size, count + 1, const char *);
 537       resp->headers[count++] = hdr;
 538
 539       /* Break upon encountering an empty line. */
 540       if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
 541         break;
 542
 543       /* Find the end of HDR, including continuations. */
 544       do
 545         {
 546           const char *end = strchr (hdr, '\n');
 547           if (end)
 548             hdr = end + 1;
 549           else
 550             hdr += strlen (hdr);
 551         }
 552       while (*hdr == ' ' || *hdr == '\t');
 553     }
 554   DO_REALLOC (resp->headers, size, count + 1, const char *);
 555   resp->headers[count] = NULL;
 556
 557   return resp;
 558 }
 559
 560 /* Locate the header named NAME in the request data, starting with
 561    position START.  This allows the code to loop through the request
 562    data, filtering for all requests of a given name.  Returns the
 563    found position, or -1 for failure.  The code that uses this
 564    function typically looks like this:
 565
 566      for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
 567        ... do something with header ...
 568
 569    If you only care about one header, use resp_header_get instead of
 570    this function.  */
 571
 572 static int
 573 resp_header_locate (const struct response *resp, const char *name, int start,
 574                     const char **begptr, const char **endptr)
 575 {
 576   int i;
 577   const char **headers = resp->headers;
 578   int name_len;
 579
 580   if (!headers || !headers[1])
 581     return -1;
 582
 583   name_len = strlen (name);
 584   if (start > 0)
 585     i = start;
 586   else
 587     i = 1;
 588
 589   for (; headers[i + 1]; i++)
 590     {
 591       const char *b = headers[i];
 592       const char *e = headers[i + 1];
 593       if (e - b > name_len
 594           && b[name_len] == ':'
 595           && 0 == strncasecmp (b, name, name_len))
 596         {
 597           b += name_len + 1;
 598           while (b < e && ISSPACE (*b))
 599             ++b;
 600           while (b < e && ISSPACE (e[-1]))
 601             --e;
 602           *begptr = b;
 603           *endptr = e;
 604           return i;
 605         }
 606     }
 607   return -1;
 608 }
 609
 610 /* Find and retrieve the header named NAME in the request data.  If
 611    found, set *BEGPTR to its starting, and *ENDPTR to its ending
 612    position, and return true.  Otherwise return false.
 613
 614    This function is used as a building block for resp_header_copy
 615    and resp_header_strdup.  */
 616
 617 static bool
 618 resp_header_get (const struct response *resp, const char *name,
 619                  const char **begptr, const char **endptr)
 620 {
 621   int pos = resp_header_locate (resp, name, 0, begptr, endptr);
 622   return pos != -1;
 623 }
 624
 625 /* Copy the response header named NAME to buffer BUF, no longer than
 626    BUFSIZE (BUFSIZE includes the terminating 0).  If the header
 627    exists, true is returned, false otherwise.  If there should be no
 628    limit on the size of the header, use resp_header_strdup instead.
 629
 630    If BUFSIZE is 0, no data is copied, but the boolean indication of
 631    whether the header is present is still returned.  */
 632
 633 static bool
 634 resp_header_copy (const struct response *resp, const char *name,
 635                   char *buf, int bufsize)
 636 {
 637   const char *b, *e;
 638   if (!resp_header_get (resp, name, &b, &e))
 639     return false;
 640   if (bufsize)
 641     {
 642       int len = MIN (e - b, bufsize - 1);
 643       memcpy (buf, b, len);
 644       buf[len] = '\0';
 645     }
 646   return true;
 647 }
 648
 649 /* Return the value of header named NAME in RESP, allocated with
 650    malloc.  If such a header does not exist in RESP, return NULL.  */
 651
 652 static char *
 653 resp_header_strdup (const struct response *resp, const char *name)
 654 {
 655   const char *b, *e;
 656   if (!resp_header_get (resp, name, &b, &e))
 657     return NULL;
 658   return strdupdelim (b, e);
 659 }
 660
 661 /* Parse the HTTP status line, which is of format:
 662
 663    HTTP-Version SP Status-Code SP Reason-Phrase
 664
 665    The function returns the status-code, or -1 if the status line
 666    appears malformed.  The pointer to "reason-phrase" message is
 667    returned in *MESSAGE.  */
 668
 669 static int
 670 resp_status (const struct response *resp, char **message)
 671 {
 672   int status;
 673   const char *p, *end;
 674
 675   if (!resp->headers)
 676     {
 677       /* For a HTTP/0.9 response, assume status 200. */
 678       if (message)
 679         *message = xstrdup (_("No headers, assuming HTTP/0.9"));
 680       return 200;
 681     }
 682
 683   p = resp->headers[0];
 684   end = resp->headers[1];
 685
 686   if (!end)
 687     return -1;
 688
 689   /* "HTTP" */
 690   if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
 691     return -1;
 692   p += 4;
 693
 694   /* Match the HTTP version.  This is optional because Gnutella
 695      servers have been reported to not specify HTTP version.  */
 696   if (p < end && *p == '/')
 697     {
 698       ++p;
 699       while (p < end && ISDIGIT (*p))
 700         ++p;
 701       if (p < end && *p == '.')
 702         ++p;
 703       while (p < end && ISDIGIT (*p))
 704         ++p;
 705     }
 706
 707   while (p < end && ISSPACE (*p))
 708     ++p;
 709   if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
 710     return -1;
 711
 712   status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
 713   p += 3;
 714
 715   if (message)
 716     {
 717       while (p < end && ISSPACE (*p))
 718         ++p;
 719       while (p < end && ISSPACE (end[-1]))
 720         --end;
 721       *message = strdupdelim (p, end);
 722     }
 723
 724   return status;
 725 }
 726
 727 /* Release the resources used by RESP.  */
 728
 729 static void
 730 resp_free (struct response *resp)
 731 {
 732   xfree_null (resp->headers);
 733   xfree (resp);
 734 }
 735
 736 /* Print the server response, line by line, omitting the trailing CRLF
 737    from individual header lines, and prefixed with PREFIX.  */
 738
 739 static void
 740 print_server_response (const struct response *resp, const char *prefix)
 741 {
 742   int i;
 743   if (!resp->headers)
 744     return;
 745   for (i = 0; resp->headers[i + 1]; i++)
 746     {
 747       const char *b = resp->headers[i];
 748       const char *e = resp->headers[i + 1];
 749       /* Skip CRLF */
 750       if (b < e && e[-1] == '\n')
 751         --e;
 752       if (b < e && e[-1] == '\r')
 753         --e;
 754       /* This is safe even on printfs with broken handling of "%.<n>s"
 755          because resp->headers ends with \0.  */
 756       logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
 757     }
 758 }
 759
 760 /* Parse the `Content-Range' header and extract the information it
 761    contains.  Returns true if successful, false otherwise.  */
 762 static bool
 763 parse_content_range (const char *hdr, wgint *first_byte_ptr,
 764                      wgint *last_byte_ptr, wgint *entity_length_ptr)
 765 {
 766   wgint num;
 767
 768   /* Ancient versions of Netscape proxy server, presumably predating
 769      rfc2068, sent out `Content-Range' without the "bytes"
 770      specifier.  */
 771   if (0 == strncasecmp (hdr, "bytes", 5))
 772     {
 773       hdr += 5;
 774       /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
 775          HTTP spec. */
 776       if (*hdr == ':')
 777         ++hdr;
 778       while (ISSPACE (*hdr))
 779         ++hdr;
 780       if (!*hdr)
 781         return false;
 782     }
 783   if (!ISDIGIT (*hdr))
 784     return false;
 785   for (num = 0; ISDIGIT (*hdr); hdr++)
 786     num = 10 * num + (*hdr - '0');
 787   if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
 788     return false;
 789   *first_byte_ptr = num;
 790   ++hdr;
 791   for (num = 0; ISDIGIT (*hdr); hdr++)
 792     num = 10 * num + (*hdr - '0');
 793   if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
 794     return false;
 795   *last_byte_ptr = num;
 796   ++hdr;
 797   for (num = 0; ISDIGIT (*hdr); hdr++)
 798     num = 10 * num + (*hdr - '0');
 799   *entity_length_ptr = num;
 800   return true;
 801 }
 802
 803 /* Read the body of the request, but don't store it anywhere and don't
 804    display a progress gauge.  This is useful for reading the bodies of
 805    administrative responses to which we will soon issue another
 806    request.  The response is not useful to the user, but reading it
 807    allows us to continue using the same connection to the server.
 808
 809    If reading fails, false is returned, true otherwise.  In debug
 810    mode, the body is displayed for debugging purposes.  */
 811
 812 static bool
 813 skip_short_body (int fd, wgint contlen)
 814 {
 815   enum {
 816     SKIP_SIZE = 512,            /* size of the download buffer */
 817     SKIP_THRESHOLD = 4096       /* the largest size we read */
 818   };
 819   char dlbuf[SKIP_SIZE + 1];
 820   dlbuf[SKIP_SIZE] = '\0';      /* so DEBUGP can safely print it */
 821
 822   /* We shouldn't get here with unknown contlen.  (This will change
 823      with HTTP/1.1, which supports "chunked" transfer.)  */
 824   assert (contlen != -1);
 825
 826   /* If the body is too large, it makes more sense to simply close the
 827      connection than to try to read the body.  */
 828   if (contlen > SKIP_THRESHOLD)
 829     return false;
 830
 831   DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
 832
 833   while (contlen > 0)
 834     {
 835       int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
 836       if (ret <= 0)
 837         {
 838           /* Don't normally report the error since this is an
 839              optimization that should be invisible to the user.  */
 840           DEBUGP (("] aborting (%s).\n",
 841                    ret < 0 ? fd_errstr (fd) : "EOF received"));
 842           return false;
 843         }
 844       contlen -= ret;
 845       /* Safe even if %.*s bogusly expects terminating \0 because
 846          we've zero-terminated dlbuf above.  */
 847       DEBUGP (("%.*s", ret, dlbuf));
 848     }
 849
 850   DEBUGP (("] done.\n"));
 851   return true;
 852 }
 853 \f
 854 /* Persistent connections.  Currently, we cache the most recently used
 855    connection as persistent, provided that the HTTP server agrees to
 856    make it such.  The persistence data is stored in the variables
 857    below.  Ideally, it should be possible to cache an arbitrary fixed
 858    number of these connections.  */
 859
 860 /* Whether a persistent connection is active. */
 861 static bool pconn_active;
 862
 863 static struct {
 864   /* The socket of the connection.  */
 865   int socket;
 866
 867   /* Host and port of the currently active persistent connection. */
 868   char *host;
 869   int port;
 870
 871   /* Whether a ssl handshake has occoured on this connection.  */
 872   bool ssl;
 873
 874   /* Whether the connection was authorized.  This is only done by
 875      NTLM, which authorizes *connections* rather than individual
 876      requests.  (That practice is peculiar for HTTP, but it is a
 877      useful optimization.)  */
 878   bool authorized;
 879
 880 #ifdef ENABLE_NTLM
 881   /* NTLM data of the current connection.  */
 882   struct ntlmdata ntlm;
 883 #endif
 884 } pconn;
 885
 886 /* Mark the persistent connection as invalid and free the resources it
 887    uses.  This is used by the CLOSE_* macros after they forcefully
 888    close a registered persistent connection.  */
 889
 890 static void
 891 invalidate_persistent (void)
 892 {
 893   DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
 894   pconn_active = false;
 895   fd_close (pconn.socket);
 896   xfree (pconn.host);
 897   xzero (pconn);
 898 }
 899
 900 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
 901    persistent.  This will enable someone to use the same connection
 902    later.  In the context of HTTP, this must be called only AFTER the
 903    response has been received and the server has promised that the
 904    connection will remain alive.
 905
 906    If a previous connection was persistent, it is closed. */
 907
 908 static void
 909 register_persistent (const char *host, int port, int fd, bool ssl)
 910 {
 911   if (pconn_active)
 912     {
 913       if (pconn.socket == fd)
 914         {
 915           /* The connection FD is already registered. */
 916           return;
 917         }
 918       else
 919         {
 920           /* The old persistent connection is still active; close it
 921              first.  This situation arises whenever a persistent
 922              connection exists, but we then connect to a different
 923              host, and try to register a persistent connection to that
 924              one.  */
 925           invalidate_persistent ();
 926         }
 927     }
 928
 929   pconn_active = true;
 930   pconn.socket = fd;
 931   pconn.host = xstrdup (host);
 932   pconn.port = port;
 933   pconn.ssl = ssl;
 934   pconn.authorized = false;
 935
 936   DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
 937 }
 938
 939 /* Return true if a persistent connection is available for connecting
 940    to HOST:PORT.  */
 941
 942 static bool
 943 persistent_available_p (const char *host, int port, bool ssl,
 944                         bool *host_lookup_failed)
 945 {
 946   /* First, check whether a persistent connection is active at all.  */
 947   if (!pconn_active)
 948     return false;
 949
 950   /* If we want SSL and the last connection wasn't or vice versa,
 951      don't use it.  Checking for host and port is not enough because
 952      HTTP and HTTPS can apparently coexist on the same port.  */
 953   if (ssl != pconn.ssl)
 954     return false;
 955
 956   /* If we're not connecting to the same port, we're not interested. */
 957   if (port != pconn.port)
 958     return false;
 959
 960   /* If the host is the same, we're in business.  If not, there is
 961      still hope -- read below.  */
 962   if (0 != strcasecmp (host, pconn.host))
 963     {
 964       /* Check if pconn.socket is talking to HOST under another name.
 965          This happens often when both sites are virtual hosts
 966          distinguished only by name and served by the same network
 967          interface, and hence the same web server (possibly set up by
 968          the ISP and serving many different web sites).  This
 969          admittedly unconventional optimization does not contradict
 970          HTTP and works well with popular server software.  */
 971
 972       bool found;
 973       ip_address ip;
 974       struct address_list *al;
 975
 976       if (ssl)
 977         /* Don't try to talk to two different SSL sites over the same
 978            secure connection!  (Besides, it's not clear that
 979            name-based virtual hosting is even possible with SSL.)  */
 980         return false;
 981
 982       /* If pconn.socket's peer is one of the IP addresses HOST
 983          resolves to, pconn.socket is for all intents and purposes
 984          already talking to HOST.  */
 985
 986       if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
 987         {
 988           /* Can't get the peer's address -- something must be very
 989              wrong with the connection.  */
 990           invalidate_persistent ();
 991           return false;
 992         }
 993       al = lookup_host (host, 0);
 994       if (!al)
 995         {
 996           *host_lookup_failed = true;
 997           return false;
 998         }
 999
1000       found = address_list_contains (al, &ip);
1001       address_list_release (al);
1002
1003       if (!found)
1004         return false;
1005
1006       /* The persistent connection's peer address was found among the
1007          addresses HOST resolved to; therefore, pconn.sock is in fact
1008          already talking to HOST -- no need to reconnect.  */
1009     }
1010
1011   /* Finally, check whether the connection is still open.  This is
1012      important because most servers implement liberal (short) timeout
1013      on persistent connections.  Wget can of course always reconnect
1014      if the connection doesn't work out, but it's nicer to know in
1015      advance.  This test is a logical followup of the first test, but
1016      is "expensive" and therefore placed at the end of the list.
1017
1018      (Current implementation of test_socket_open has a nice side
1019      effect that it treats sockets with pending data as "closed".
1020      This is exactly what we want: if a broken server sends message
1021      body in response to HEAD, or if it sends more than conent-length
1022      data, we won't reuse the corrupted connection.)  */
1023
1024   if (!test_socket_open (pconn.socket))
1025     {
1026       /* Oops, the socket is no longer open.  Now that we know that,
1027          let's invalidate the persistent connection before returning
1028          0.  */
1029       invalidate_persistent ();
1030       return false;
1031     }
1032
1033   return true;
1034 }
1035
1036 /* The idea behind these two CLOSE macros is to distinguish between
1037    two cases: one when the job we've been doing is finished, and we
1038    want to close the connection and leave, and two when something is
1039    seriously wrong and we're closing the connection as part of
1040    cleanup.
1041
1042    In case of keep_alive, CLOSE_FINISH should leave the connection
1043    open, while CLOSE_INVALIDATE should still close it.
1044
1045    Note that the semantics of the flag `keep_alive' is "this
1046    connection *will* be reused (the server has promised not to close
1047    the connection once we're done)", while the semantics of
1048    `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1049    active, registered connection".  */
1050
1051 #define CLOSE_FINISH(fd) do {                   \
1052   if (!keep_alive)                              \
1053     {                                           \
1054       if (pconn_active && (fd) == pconn.socket) \
1055         invalidate_persistent ();               \
1056       else                                      \
1057         {                                       \
1058           fd_close (fd);                        \
1059           fd = -1;                              \
1060         }                                       \
1061     }                                           \
1062 } while (0)
1063
1064 #define CLOSE_INVALIDATE(fd) do {               \
1065   if (pconn_active && (fd) == pconn.socket)     \
1066     invalidate_persistent ();                   \
1067   else                                          \
1068     fd_close (fd);                              \
1069   fd = -1;                                      \
1070 } while (0)
1071 \f
1072 struct http_stat
1073 {
1074   wgint len;                    /* received length */
1075   wgint contlen;                /* expected length */
1076   wgint restval;                /* the restart value */
1077   int res;                      /* the result of last read */
1078   char *rderrmsg;               /* error message from read error */
1079   char *newloc;                 /* new location (redirection) */
1080   char *remote_time;            /* remote time-stamp string */
1081   char *error;                  /* textual HTTP error */
1082   int statcode;                 /* status code */
1083   wgint rd_size;                /* amount of data read from socket */
1084   double dltime;                /* time it took to download the data */
1085   const char *referer;          /* value of the referer header. */
1086   char **local_file;            /* local file. */
1087 };
1088
1089 static void
1090 free_hstat (struct http_stat *hs)
1091 {
1092   xfree_null (hs->newloc);
1093   xfree_null (hs->remote_time);
1094   xfree_null (hs->error);
1095   xfree_null (hs->rderrmsg);
1096
1097   /* Guard against being called twice. */
1098   hs->newloc = NULL;
1099   hs->remote_time = NULL;
1100   hs->error = NULL;
1101 }
1102
1103 static char *create_authorization_line (const char *, const char *,
1104                                         const char *, const char *,
1105                                         const char *, bool *);
1106 static char *basic_authentication_encode (const char *, const char *);
1107 static bool known_authentication_scheme_p (const char *, const char *);
1108
1109 #define BEGINS_WITH(line, string_constant)                              \
1110   (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)   \
1111    && (ISSPACE (line[sizeof (string_constant) - 1])                     \
1112        || !line[sizeof (string_constant) - 1]))
1113
1114 #define SET_USER_AGENT(req) do {                                        \
1115   if (!opt.useragent)                                                   \
1116     request_set_header (req, "User-Agent",                              \
1117                         aprintf ("Wget/%s", version_string), rel_value); \
1118   else if (*opt.useragent)                                              \
1119     request_set_header (req, "User-Agent", opt.useragent, rel_none);    \
1120 } while (0)
1121
1122 /* The flags that allow clobbering the file (opening with "wb").
1123    Defined here to avoid repetition later.  #### This will require
1124    rework.  */
1125 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1126                        || opt.dirstruct || opt.output_document)
1127
1128 /* Retrieve a document through HTTP protocol.  It recognizes status
1129    code, and correctly handles redirections.  It closes the network
1130    socket.  If it receives an error from the functions below it, it
1131    will print it if there is enough information to do so (almost
1132    always), returning the error to the caller (i.e. http_loop).
1133
1134    Various HTTP parameters are stored to hs.
1135
1136    If PROXY is non-NULL, the connection will be made to the proxy
1137    server, and u->url will be requested.  */
1138 static uerr_t
1139 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1140 {
1141   struct request *req;
1142
1143   char *type;
1144   char *user, *passwd;
1145   char *proxyauth;
1146   int statcode;
1147   int write_error;
1148   wgint contlen, contrange;
1149   struct url *conn;
1150   FILE *fp;
1151
1152   int sock = -1;
1153   int flags;
1154
1155   /* Set to 1 when the authorization has failed permanently and should
1156      not be tried again. */
1157   bool auth_finished = false;
1158
1159   /* Whether NTLM authentication is used for this request. */
1160   bool ntlm_seen = false;
1161
1162   /* Whether our connection to the remote host is through SSL.  */
1163   bool using_ssl = false;
1164
1165   /* Whether a HEAD request will be issued (as opposed to GET or
1166      POST). */
1167   bool head_only = !!(*dt & HEAD_ONLY);
1168
1169   char *head;
1170   struct response *resp;
1171   char hdrval[256];
1172   char *message;
1173
1174   /* Whether this connection will be kept alive after the HTTP request
1175      is done. */
1176   bool keep_alive;
1177
1178   /* Whether keep-alive should be inhibited.
1179
1180      RFC 2068 requests that 1.0 clients not send keep-alive requests
1181      to proxies.  This is because many 1.0 proxies do not interpret
1182      the Connection header and transfer it to the remote server,
1183      causing it to not close the connection and leave both the proxy
1184      and the client hanging.  */
1185   bool inhibit_keep_alive =
1186     !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1187
1188   /* Headers sent when using POST. */
1189   wgint post_data_size = 0;
1190
1191   bool host_lookup_failed = false;
1192
1193 #ifdef HAVE_SSL
1194   if (u->scheme == SCHEME_HTTPS)
1195     {
1196       /* Initialize the SSL context.  After this has once been done,
1197          it becomes a no-op.  */
1198       if (!ssl_init ())
1199         {
1200           scheme_disable (SCHEME_HTTPS);
1201           logprintf (LOG_NOTQUIET,
1202                      _("Disabling SSL due to encountered errors.\n"));
1203           return SSLINITFAILED;
1204         }
1205     }
1206 #endif /* HAVE_SSL */
1207
1208   if (!head_only)
1209     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1210        know the local filename so we can save to it. */
1211     assert (*hs->local_file != NULL);
1212
1213   /* Initialize certain elements of struct http_stat.  */
1214   hs->len = 0;
1215   hs->contlen = -1;
1216   hs->res = -1;
1217   hs->rderrmsg = NULL;
1218   hs->newloc = NULL;
1219   hs->remote_time = NULL;
1220   hs->error = NULL;
1221
1222   conn = u;
1223
1224   /* Prepare the request to send. */
1225
1226   req = request_new ();
1227   {
1228     char *meth_arg;
1229     const char *meth = "GET";
1230     if (head_only)
1231       meth = "HEAD";
1232     else if (opt.post_file_name || opt.post_data)
1233       meth = "POST";
1234     /* Use the full path, i.e. one that includes the leading slash and
1235        the query string.  E.g. if u->path is "foo/bar" and u->query is
1236        "param=value", full_path will be "/foo/bar?param=value".  */
1237     if (proxy
1238 #ifdef HAVE_SSL
1239         /* When using SSL over proxy, CONNECT establishes a direct
1240            connection to the HTTPS server.  Therefore use the same
1241            argument as when talking to the server directly. */
1242         && u->scheme != SCHEME_HTTPS
1243 #endif
1244         )
1245       meth_arg = xstrdup (u->url);
1246     else
1247       meth_arg = url_full_path (u);
1248     request_set_method (req, meth, meth_arg);
1249   }
1250
1251   request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1252   if (*dt & SEND_NOCACHE)
1253     request_set_header (req, "Pragma", "no-cache", rel_none);
1254   if (hs->restval)
1255     request_set_header (req, "Range",
1256                         aprintf ("bytes=%s-",
1257                                  number_to_static_string (hs->restval)),
1258                         rel_value);
1259   SET_USER_AGENT (req);
1260   request_set_header (req, "Accept", "*/*", rel_none);
1261
1262   /* Find the username and password for authentication. */
1263   user = u->user;
1264   passwd = u->passwd;
1265   search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1266   user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1267   passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1268
1269   if (user && passwd)
1270     {
1271       /* We have the username and the password, but haven't tried
1272          any authorization yet.  Let's see if the "Basic" method
1273          works.  If not, we'll come back here and construct a
1274          proper authorization method with the right challenges.
1275
1276          If we didn't employ this kind of logic, every URL that
1277          requires authorization would have to be processed twice,
1278          which is very suboptimal and generates a bunch of false
1279          "unauthorized" errors in the server log.
1280
1281          #### But this logic also has a serious problem when used
1282          with stronger authentications: we *first* transmit the
1283          username and the password in clear text, and *then* attempt a
1284          stronger authentication scheme.  That cannot be right!  We
1285          are only fortunate that almost everyone still uses the
1286          `Basic' scheme anyway.
1287
1288          There should be an option to prevent this from happening, for
1289          those who use strong authentication schemes and value their
1290          passwords.  */
1291       request_set_header (req, "Authorization",
1292                           basic_authentication_encode (user, passwd),
1293                           rel_value);
1294     }
1295
1296   proxyauth = NULL;
1297   if (proxy)
1298     {
1299       char *proxy_user, *proxy_passwd;
1300       /* For normal username and password, URL components override
1301          command-line/wgetrc parameters.  With proxy
1302          authentication, it's the reverse, because proxy URLs are
1303          normally the "permanent" ones, so command-line args
1304          should take precedence.  */
1305       if (opt.proxy_user && opt.proxy_passwd)
1306         {
1307           proxy_user = opt.proxy_user;
1308           proxy_passwd = opt.proxy_passwd;
1309         }
1310       else
1311         {
1312           proxy_user = proxy->user;
1313           proxy_passwd = proxy->passwd;
1314         }
1315       /* #### This does not appear right.  Can't the proxy request,
1316          say, `Digest' authentication?  */
1317       if (proxy_user && proxy_passwd)
1318         proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1319
1320       /* If we're using a proxy, we will be connecting to the proxy
1321          server.  */
1322       conn = proxy;
1323
1324       /* Proxy authorization over SSL is handled below. */
1325 #ifdef HAVE_SSL
1326       if (u->scheme != SCHEME_HTTPS)
1327 #endif
1328         request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1329     }
1330
1331   /* Generate the Host header, HOST:PORT.  Take into account that:
1332
1333      - Broken server-side software often doesn't recognize the PORT
1334        argument, so we must generate "Host: www.server.com" instead of
1335        "Host: www.server.com:80" (and likewise for https port).
1336
1337      - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
1338        becomes ambiguous and needs to be rewritten as "Host:
1339        [3ffe:8100:200:2::2]:1234".  */
1340   {
1341     /* Formats arranged for hfmt[add_port][add_squares].  */
1342     static const char *hfmt[][2] = {
1343       { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
1344     };
1345     int add_port = u->port != scheme_default_port (u->scheme);
1346     int add_squares = strchr (u->host, ':') != NULL;
1347     request_set_header (req, "Host",
1348                         aprintf (hfmt[add_port][add_squares], u->host, u->port),
1349                         rel_value);
1350   }
1351
1352   if (!inhibit_keep_alive)
1353     request_set_header (req, "Connection", "Keep-Alive", rel_none);
1354
1355   if (opt.cookies)
1356     request_set_header (req, "Cookie",
1357                         cookie_header (wget_cookie_jar,
1358                                        u->host, u->port, u->path,
1359 #ifdef HAVE_SSL
1360                                        u->scheme == SCHEME_HTTPS
1361 #else
1362                                        0
1363 #endif
1364                                        ),
1365                         rel_value);
1366
1367   if (opt.post_data || opt.post_file_name)
1368     {
1369       request_set_header (req, "Content-Type",
1370                           "application/x-www-form-urlencoded", rel_none);
1371       if (opt.post_data)
1372         post_data_size = strlen (opt.post_data);
1373       else
1374         {
1375           post_data_size = file_size (opt.post_file_name);
1376           if (post_data_size == -1)
1377             {
1378               logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
1379                          opt.post_file_name, strerror (errno));
1380               post_data_size = 0;
1381             }
1382         }
1383       request_set_header (req, "Content-Length",
1384                           xstrdup (number_to_static_string (post_data_size)),
1385                           rel_value);
1386     }
1387
1388   /* Add the user headers. */
1389   if (opt.user_headers)
1390     {
1391       int i;
1392       for (i = 0; opt.user_headers[i]; i++)
1393         request_set_user_header (req, opt.user_headers[i]);
1394     }
1395
1396  retry_with_auth:
1397   /* We need to come back here when the initial attempt to retrieve
1398      without authorization header fails.  (Expected to happen at least
1399      for the Digest authorization scheme.)  */
1400
1401   keep_alive = false;
1402
1403   /* Establish the connection.  */
1404
1405   if (!inhibit_keep_alive)
1406     {
1407       /* Look for a persistent connection to target host, unless a
1408          proxy is used.  The exception is when SSL is in use, in which
1409          case the proxy is nothing but a passthrough to the target
1410          host, registered as a connection to the latter.  */
1411       struct url *relevant = conn;
1412 #ifdef HAVE_SSL
1413       if (u->scheme == SCHEME_HTTPS)
1414         relevant = u;
1415 #endif
1416
1417       if (persistent_available_p (relevant->host, relevant->port,
1418 #ifdef HAVE_SSL
1419                                   relevant->scheme == SCHEME_HTTPS,
1420 #else
1421                                   0,
1422 #endif
1423                                   &host_lookup_failed))
1424         {
1425           sock = pconn.socket;
1426           using_ssl = pconn.ssl;
1427           logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1428                      escnonprint (pconn.host), pconn.port);
1429           DEBUGP (("Reusing fd %d.\n", sock));
1430           if (pconn.authorized)
1431             /* If the connection is already authorized, the "Basic"
1432                authorization added by code above is unnecessary and
1433                only hurts us.  */
1434             request_remove_header (req, "Authorization");
1435         }
1436     }
1437
1438   if (sock < 0)
1439     {
1440       /* In its current implementation, persistent_available_p will
1441          look up conn->host in some cases.  If that lookup failed, we
1442          don't need to bother with connect_to_host.  */
1443       if (host_lookup_failed)
1444         {
1445           request_free (req);
1446           return HOSTERR;
1447         }
1448
1449       sock = connect_to_host (conn->host, conn->port);
1450       if (sock == E_HOST)
1451         {
1452           request_free (req);
1453           return HOSTERR;
1454         }
1455       else if (sock < 0)
1456         {
1457           request_free (req);
1458           return (retryable_socket_connect_error (errno)
1459                   ? CONERROR : CONIMPOSSIBLE);
1460         }
1461
1462 #ifdef HAVE_SSL
1463       if (proxy && u->scheme == SCHEME_HTTPS)
1464         {
1465           /* When requesting SSL URLs through proxies, use the
1466              CONNECT method to request passthrough.  */
1467           struct request *connreq = request_new ();
1468           request_set_method (connreq, "CONNECT",
1469                               aprintf ("%s:%d", u->host, u->port));
1470           SET_USER_AGENT (connreq);
1471           if (proxyauth)
1472             {
1473               request_set_header (connreq, "Proxy-Authorization",
1474                                   proxyauth, rel_value);
1475               /* Now that PROXYAUTH is part of the CONNECT request,
1476                  zero it out so we don't send proxy authorization with
1477                  the regular request below.  */
1478               proxyauth = NULL;
1479             }
1480           /* Examples in rfc2817 use the Host header in CONNECT
1481              requests.  I don't see how that gains anything, given
1482              that the contents of Host would be exactly the same as
1483              the contents of CONNECT.  */
1484
1485           write_error = request_send (connreq, sock);
1486           request_free (connreq);
1487           if (write_error < 0)
1488             {
1489               CLOSE_INVALIDATE (sock);
1490               return WRITEFAILED;
1491             }
1492
1493           head = read_http_response_head (sock);
1494           if (!head)
1495             {
1496               logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1497                          fd_errstr (sock));
1498               CLOSE_INVALIDATE (sock);
1499               return HERR;
1500             }
1501           message = NULL;
1502           if (!*head)
1503             {
1504               xfree (head);
1505               goto failed_tunnel;
1506             }
1507           DEBUGP (("proxy responded with: [%s]\n", head));
1508
1509           resp = resp_new (head);
1510           statcode = resp_status (resp, &message);
1511           resp_free (resp);
1512           xfree (head);
1513           if (statcode != 200)
1514             {
1515             failed_tunnel:
1516               logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1517                          message ? escnonprint (message) : "?");
1518               xfree_null (message);
1519               return CONSSLERR;
1520             }
1521           xfree_null (message);
1522
1523           /* SOCK is now *really* connected to u->host, so update CONN
1524              to reflect this.  That way register_persistent will
1525              register SOCK as being connected to u->host:u->port.  */
1526           conn = u;
1527         }
1528
1529       if (conn->scheme == SCHEME_HTTPS)
1530         {
1531           if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1532             {
1533               fd_close (sock);
1534               return CONSSLERR;
1535             }
1536           using_ssl = true;
1537         }
1538 #endif /* HAVE_SSL */
1539     }
1540
1541   /* Send the request to server.  */
1542   write_error = request_send (req, sock);
1543
1544   if (write_error >= 0)
1545     {
1546       if (opt.post_data)
1547         {
1548           DEBUGP (("[POST data: %s]\n", opt.post_data));
1549           write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1550         }
1551       else if (opt.post_file_name && post_data_size != 0)
1552         write_error = post_file (sock, opt.post_file_name, post_data_size);
1553     }
1554
1555   if (write_error < 0)
1556     {
1557       CLOSE_INVALIDATE (sock);
1558       request_free (req);
1559       return WRITEFAILED;
1560     }
1561   logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1562              proxy ? "Proxy" : "HTTP");
1563   contlen = -1;
1564   contrange = 0;
1565   *dt &= ~RETROKF;
1566
1567   head = read_http_response_head (sock);
1568   if (!head)
1569     {
1570       if (errno == 0)
1571         {
1572           logputs (LOG_NOTQUIET, _("No data received.\n"));
1573           CLOSE_INVALIDATE (sock);
1574           request_free (req);
1575           return HEOF;
1576         }
1577       else
1578         {
1579           logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1580                      fd_errstr (sock));
1581           CLOSE_INVALIDATE (sock);
1582           request_free (req);
1583           return HERR;
1584         }
1585     }
1586   DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1587
1588   resp = resp_new (head);
1589
1590   /* Check for status line.  */
1591   message = NULL;
1592   statcode = resp_status (resp, &message);
1593   if (!opt.server_response)
1594     logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1595                message ? escnonprint (message) : "");
1596   else
1597     {
1598       logprintf (LOG_VERBOSE, "\n");
1599       print_server_response (resp, "  ");
1600     }
1601
1602   if (!opt.ignore_length
1603       && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1604     {
1605       wgint parsed;
1606       errno = 0;
1607       parsed = str_to_wgint (hdrval, NULL, 10);
1608       if (parsed == WGINT_MAX && errno == ERANGE)
1609         /* Out of range.
1610            #### If Content-Length is out of range, it most likely
1611            means that the file is larger than 2G and that we're
1612            compiled without LFS.  In that case we should probably
1613            refuse to even attempt to download the file.  */
1614         contlen = -1;
1615       else
1616         contlen = parsed;
1617     }
1618
1619   /* Check for keep-alive related responses. */
1620   if (!inhibit_keep_alive && contlen != -1)
1621     {
1622       if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1623         keep_alive = true;
1624       else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1625         {
1626           if (0 == strcasecmp (hdrval, "Keep-Alive"))
1627             keep_alive = true;
1628         }
1629     }
1630   if (keep_alive)
1631     /* The server has promised that it will not close the connection
1632        when we're done.  This means that we can register it.  */
1633     register_persistent (conn->host, conn->port, sock, using_ssl);
1634
1635   if (statcode == HTTP_STATUS_UNAUTHORIZED)
1636     {
1637       /* Authorization is required.  */
1638       if (keep_alive && !head_only && skip_short_body (sock, contlen))
1639         CLOSE_FINISH (sock);
1640       else
1641         CLOSE_INVALIDATE (sock);
1642       pconn.authorized = false;
1643       if (!auth_finished && (user && passwd))
1644         {
1645           /* IIS sends multiple copies of WWW-Authenticate, one with
1646              the value "negotiate", and other(s) with data.  Loop over
1647              all the occurrences and pick the one we recognize.  */
1648           int wapos;
1649           const char *wabeg, *waend;
1650           char *www_authenticate = NULL;
1651           for (wapos = 0;
1652                (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1653                                             &wabeg, &waend)) != -1;
1654                ++wapos)
1655             if (known_authentication_scheme_p (wabeg, waend))
1656               {
1657                 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1658                 break;
1659               }
1660
1661           if (!www_authenticate)
1662             /* If the authentication header is missing or
1663                unrecognized, there's no sense in retrying.  */
1664             logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1665           else if (BEGINS_WITH (www_authenticate, "Basic"))
1666             /* If the authentication scheme is "Basic", which we send
1667                by default, there's no sense in retrying either.  (This
1668                should be changed when we stop sending "Basic" data by
1669                default.)  */
1670             ;
1671           else
1672             {
1673               char *pth;
1674               pth = url_full_path (u);
1675               request_set_header (req, "Authorization",
1676                                   create_authorization_line (www_authenticate,
1677                                                              user, passwd,
1678                                                              request_method (req),
1679                                                              pth,
1680                                                              &auth_finished),
1681                                   rel_value);
1682               if (BEGINS_WITH (www_authenticate, "NTLM"))
1683                 ntlm_seen = true;
1684               xfree (pth);
1685               goto retry_with_auth;
1686             }
1687         }
1688       logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1689       request_free (req);
1690       return AUTHFAILED;
1691     }
1692   else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1693     {
1694       /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1695       if (ntlm_seen)
1696         pconn.authorized = true;
1697     }
1698   request_free (req);
1699
1700   hs->statcode = statcode;
1701   if (statcode == -1)
1702     hs->error = xstrdup (_("Malformed status line"));
1703   else if (!*message)
1704     hs->error = xstrdup (_("(no description)"));
1705   else
1706     hs->error = xstrdup (message);
1707   xfree_null (message);
1708
1709   type = resp_header_strdup (resp, "Content-Type");
1710   if (type)
1711     {
1712       char *tmp = strchr (type, ';');
1713       if (tmp)
1714         {
1715           while (tmp > type && ISSPACE (tmp[-1]))
1716             --tmp;
1717           *tmp = '\0';
1718         }
1719     }
1720   hs->newloc = resp_header_strdup (resp, "Location");
1721   hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1722
1723   /* Handle (possibly multiple instances of) the Set-Cookie header. */
1724   if (opt.cookies)
1725     {
1726       int scpos;
1727       const char *scbeg, *scend;
1728       /* The jar should have been created by now. */
1729       assert (wget_cookie_jar != NULL);
1730       for (scpos = 0;
1731            (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1732                                         &scbeg, &scend)) != -1;
1733            ++scpos)
1734         {
1735           char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1736           cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
1737                                     u->path, set_cookie);
1738         }
1739     }
1740
1741   if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1742     {
1743       wgint first_byte_pos, last_byte_pos, entity_length;
1744       if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1745                                &entity_length))
1746         contrange = first_byte_pos;
1747     }
1748   resp_free (resp);
1749
1750   /* 20x responses are counted among successful by default.  */
1751   if (H_20X (statcode))
1752     *dt |= RETROKF;
1753
1754   /* Return if redirected.  */
1755   if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1756     {
1757       /* RFC2068 says that in case of the 300 (multiple choices)
1758          response, the server can output a preferred URL through
1759          `Location' header; otherwise, the request should be treated
1760          like GET.  So, if the location is set, it will be a
1761          redirection; otherwise, just proceed normally.  */
1762       if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1763         *dt |= RETROKF;
1764       else
1765         {
1766           logprintf (LOG_VERBOSE,
1767                      _("Location: %s%s\n"),
1768                      hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
1769                      hs->newloc ? _(" [following]") : "");
1770           if (keep_alive && !head_only && skip_short_body (sock, contlen))
1771             CLOSE_FINISH (sock);
1772           else
1773             CLOSE_INVALIDATE (sock);
1774           xfree_null (type);
1775           return NEWLOCATION;
1776         }
1777     }
1778
1779   /* If content-type is not given, assume text/html.  This is because
1780      of the multitude of broken CGI's that "forget" to generate the
1781      content-type.  */
1782   if (!type ||
1783         0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1784         0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1785     *dt |= TEXTHTML;
1786   else
1787     *dt &= ~TEXTHTML;
1788
1789   if (opt.html_extension && (*dt & TEXTHTML))
1790     /* -E / --html-extension / html_extension = on was specified, and this is a
1791        text/html file.  If some case-insensitive variation on ".htm[l]" isn't
1792        already the file's suffix, tack on ".html". */
1793     {
1794       char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
1795
1796       if (last_period_in_local_filename == NULL
1797           || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1798                || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1799         {
1800           int local_filename_len = strlen (*hs->local_file);
1801           /* Resize the local file, allowing for ".html" preceded by
1802              optional ".NUMBER".  */
1803           *hs->local_file = xrealloc (*hs->local_file,
1804                                       local_filename_len + 24 + sizeof (".html"));
1805           strcpy(*hs->local_file + local_filename_len, ".html");
1806           /* If clobbering is not allowed and the file, as named,
1807              exists, tack on ".NUMBER.html" instead. */
1808           if (!ALLOW_CLOBBER)
1809             {
1810               int ext_num = 1;
1811               do
1812                 sprintf (*hs->local_file + local_filename_len,
1813                          ".%d.html", ext_num++);
1814               while (file_exists_p (*hs->local_file));
1815             }
1816           *dt |= ADDED_HTML_EXTENSION;
1817         }
1818     }
1819
1820   if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
1821     {
1822       /* If `-c' is in use and the file has been fully downloaded (or
1823          the remote file has shrunk), Wget effectively requests bytes
1824          after the end of file and the server response with 416.  */
1825       logputs (LOG_VERBOSE, _("\
1826 \n    The file is already fully retrieved; nothing to do.\n\n"));
1827       /* In case the caller inspects. */
1828       hs->len = contlen;
1829       hs->res = 0;
1830       /* Mark as successfully retrieved. */
1831       *dt |= RETROKF;
1832       xfree_null (type);
1833       CLOSE_INVALIDATE (sock);  /* would be CLOSE_FINISH, but there
1834                                    might be more bytes in the body. */
1835       return RETRUNNEEDED;
1836     }
1837   if ((contrange != 0 && contrange != hs->restval)
1838       || (H_PARTIAL (statcode) && !contrange))
1839     {
1840       /* The Range request was somehow misunderstood by the server.
1841          Bail out.  */
1842       xfree_null (type);
1843       CLOSE_INVALIDATE (sock);
1844       return RANGEERR;
1845     }
1846   hs->contlen = contlen + contrange;
1847
1848   if (opt.verbose)
1849     {
1850       if (*dt & RETROKF)
1851         {
1852           /* No need to print this output if the body won't be
1853              downloaded at all, or if the original server response is
1854              printed.  */
1855           logputs (LOG_VERBOSE, _("Length: "));
1856           if (contlen != -1)
1857             {
1858               logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
1859               if (contlen + contrange >= 1024)
1860                 logprintf (LOG_VERBOSE, " (%s)",
1861                            human_readable (contlen + contrange));
1862               if (contrange)
1863                 {
1864                   if (contlen >= 1024)
1865                     logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
1866                                number_to_static_string (contlen),
1867                                human_readable (contlen));
1868                   else
1869                     logprintf (LOG_VERBOSE, _(", %s remaining"),
1870                                number_to_static_string (contlen));
1871                 }
1872             }
1873           else
1874             logputs (LOG_VERBOSE,
1875                      opt.ignore_length ? _("ignored") : _("unspecified"));
1876           if (type)
1877             logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
1878           else
1879             logputs (LOG_VERBOSE, "\n");
1880         }
1881     }
1882   xfree_null (type);
1883   type = NULL;                  /* We don't need it any more.  */
1884
1885   /* Return if we have no intention of further downloading.  */
1886   if (!(*dt & RETROKF) || head_only)
1887     {
1888       /* In case the caller cares to look...  */
1889       hs->len = 0;
1890       hs->res = 0;
1891       xfree_null (type);
1892       if (head_only)
1893         /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
1894            servers not to send body in response to a HEAD request, and
1895            those that do will likely be caught by test_socket_open.
1896            If not, they can be worked around using
1897            `--no-http-keep-alive'.  */
1898         CLOSE_FINISH (sock);
1899       else if (keep_alive && skip_short_body (sock, contlen))
1900         /* Successfully skipped the body; also keep using the socket. */
1901         CLOSE_FINISH (sock);
1902       else
1903         CLOSE_INVALIDATE (sock);
1904       return RETRFINISHED;
1905     }
1906
1907   /* Open the local file.  */
1908   if (!output_stream)
1909     {
1910       mkalldirs (*hs->local_file);
1911       if (opt.backups)
1912         rotate_backups (*hs->local_file);
1913       if (hs->restval)
1914         fp = fopen (*hs->local_file, "ab");
1915       else if (ALLOW_CLOBBER)
1916         fp = fopen (*hs->local_file, "wb");
1917       else
1918         {
1919           fp = fopen_excl (*hs->local_file, true);
1920           if (!fp && errno == EEXIST)
1921             {
1922               /* We cannot just invent a new name and use it (which is
1923                  what functions like unique_create typically do)
1924                  because we told the user we'd use this name.
1925                  Instead, return and retry the download.  */
1926               logprintf (LOG_NOTQUIET,
1927                          _("%s has sprung into existence.\n"),
1928                          *hs->local_file);
1929               CLOSE_INVALIDATE (sock);
1930               return FOPEN_EXCL_ERR;
1931             }
1932         }
1933       if (!fp)
1934         {
1935           logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1936           CLOSE_INVALIDATE (sock);
1937           return FOPENERR;
1938         }
1939     }
1940   else
1941     fp = output_stream;
1942
1943   /* #### This confuses the timestamping code that checks for file
1944      size.  Maybe we should save some additional information?  */
1945   if (opt.save_headers)
1946     fwrite (head, 1, strlen (head), fp);
1947
1948   /* Now we no longer need to store the response header. */
1949   xfree (head);
1950
1951   /* Download the request body.  */
1952   flags = 0;
1953   if (contlen != -1)
1954     /* If content-length is present, read that much; otherwise, read
1955        until EOF.  The HTTP spec doesn't require the server to
1956        actually close the connection when it's done sending data. */
1957     flags |= rb_read_exactly;
1958   if (hs->restval > 0 && contrange == 0)
1959     /* If the server ignored our range request, instruct fd_read_body
1960        to skip the first RESTVAL bytes of body.  */
1961     flags |= rb_skip_startpos;
1962   hs->len = hs->restval;
1963   hs->rd_size = 0;
1964   hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
1965                           hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
1966                           flags);
1967
1968   if (hs->res >= 0)
1969     CLOSE_FINISH (sock);
1970   else
1971     {
1972       if (hs->res < 0)
1973         hs->rderrmsg = xstrdup (fd_errstr (sock));
1974       CLOSE_INVALIDATE (sock);
1975     }
1976
1977   if (!output_stream)
1978     fclose (fp);
1979   if (hs->res == -2)
1980     return FWRITEERR;
1981   return RETRFINISHED;
1982 }
1983
1984 /* The genuine HTTP loop!  This is the part where the retrieval is
1985    retried, and retried, and retried, and...  */
1986 uerr_t
1987 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1988            int *dt, struct url *proxy)
1989 {
1990   int count;
1991   bool use_ts, got_head = false;/* time-stamping info */
1992   char *filename_plus_orig_suffix;
1993   char *local_filename = NULL;
1994   char *tms, *locf;
1995   const char *tmrate;
1996   uerr_t err;
1997   time_t tml = -1, tmr = -1;    /* local and remote time-stamps */
1998   wgint local_size = 0;         /* the size of the local file */
1999   size_t filename_len;
2000   struct http_stat hstat;       /* HTTP status */
2001   struct_stat st;
2002   char *dummy = NULL;
2003
2004   /* This used to be done in main(), but it's a better idea to do it
2005      here so that we don't go through the hoops if we're just using
2006      FTP or whatever. */
2007   if (opt.cookies)
2008     {
2009       if (!wget_cookie_jar)
2010         wget_cookie_jar = cookie_jar_new ();
2011       if (opt.cookies_input && !cookies_loaded_p)
2012         {
2013           cookie_jar_load (wget_cookie_jar, opt.cookies_input);
2014           cookies_loaded_p = true;
2015         }
2016     }
2017
2018   *newloc = NULL;
2019
2020   /* Warn on (likely bogus) wildcard usage in HTTP.  */
2021   if (opt.ftp_glob && has_wildcards_p (u->path))
2022     logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2023
2024   xzero (hstat);
2025
2026   /* Determine the local filename.  */
2027   if (local_file && *local_file)
2028     hstat.local_file = local_file;
2029   else if (local_file && !opt.output_document)
2030     {
2031       *local_file = url_file_name (u);
2032       hstat.local_file = local_file;
2033     }
2034   else
2035     {
2036       dummy = url_file_name (u);
2037       hstat.local_file = &dummy;
2038       /* be honest about where we will save the file */
2039       if (local_file && opt.output_document)
2040         *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2041     }
2042
2043   if (!opt.output_document)
2044     locf = *hstat.local_file;
2045   else
2046     locf = opt.output_document;
2047
2048   hstat.referer = referer;
2049
2050   filename_len = strlen (*hstat.local_file);
2051   filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
2052
2053   if (opt.noclobber && file_exists_p (*hstat.local_file))
2054     {
2055       /* If opt.noclobber is turned on and file already exists, do not
2056          retrieve the file */
2057       logprintf (LOG_VERBOSE, _("\
2058 File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
2059       /* If the file is there, we suppose it's retrieved OK.  */
2060       *dt |= RETROKF;
2061
2062       /* #### Bogusness alert.  */
2063       /* If its suffix is "html" or "htm" or similar, assume text/html.  */
2064       if (has_html_suffix_p (*hstat.local_file))
2065         *dt |= TEXTHTML;
2066
2067       xfree_null (dummy);
2068       return RETROK;
2069     }
2070
2071   use_ts = false;
2072   if (opt.timestamping)
2073     {
2074       bool local_dot_orig_file_exists = false;
2075
2076       if (opt.backup_converted)
2077         /* If -K is specified, we'll act on the assumption that it was specified
2078            last time these files were downloaded as well, and instead of just
2079            comparing local file X against server file X, we'll compare local
2080            file X.orig (if extant, else X) against server file X.  If -K
2081            _wasn't_ specified last time, or the server contains files called
2082            *.orig, -N will be back to not operating correctly with -k. */
2083         {
2084           /* Would a single s[n]printf() call be faster?  --dan
2085
2086              Definitely not.  sprintf() is horribly slow.  It's a
2087              different question whether the difference between the two
2088              affects a program.  Usually I'd say "no", but at one
2089              point I profiled Wget, and found that a measurable and
2090              non-negligible amount of time was lost calling sprintf()
2091              in url.c.  Replacing sprintf with inline calls to
2092              strcpy() and number_to_string() made a difference.
2093              --hniksic */
2094           memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
2095           memcpy (filename_plus_orig_suffix + filename_len,
2096                   ".orig", sizeof (".orig"));
2097
2098           /* Try to stat() the .orig file. */
2099           if (stat (filename_plus_orig_suffix, &st) == 0)
2100             {
2101               local_dot_orig_file_exists = 1;
2102               local_filename = filename_plus_orig_suffix;
2103             }
2104         }
2105
2106       if (!local_dot_orig_file_exists)
2107         /* Couldn't stat() <file>.orig, so try to stat() <file>. */
2108         if (stat (*hstat.local_file, &st) == 0)
2109           local_filename = *hstat.local_file;
2110
2111       if (local_filename != NULL)
2112         /* There was a local file, so we'll check later to see if the version
2113            the server has is the same version we already have, allowing us to
2114            skip a download. */
2115         {
2116           use_ts = true;
2117           tml = st.st_mtime;
2118 #ifdef WINDOWS
2119           /* Modification time granularity is 2 seconds for Windows, so
2120              increase local time by 1 second for later comparison. */
2121           tml++;
2122 #endif
2123           local_size = st.st_size;
2124           got_head = false;
2125         }
2126     }
2127   /* Reset the counter.  */
2128   count = 0;
2129   *dt = 0;
2130   /* THE loop */
2131   do
2132     {
2133       /* Increment the pass counter.  */
2134       ++count;
2135       sleep_between_retrievals (count);
2136       /* Get the current time string.  */
2137       tms = time_str (NULL);
2138       /* Print fetch message, if opt.verbose.  */
2139       if (opt.verbose)
2140         {
2141           char *hurl = url_string (u, true);
2142           char tmp[256];
2143           strcpy (tmp, "        ");
2144           if (count > 1)
2145             sprintf (tmp, _("(try:%2d)"), count);
2146           logprintf (LOG_VERBOSE, "--%s--  %s\n  %s => `%s'\n",
2147                      tms, hurl, tmp, locf);
2148 #ifdef WINDOWS
2149           ws_changetitle (hurl);
2150 #endif
2151           xfree (hurl);
2152         }
2153
2154       /* Default document type is empty.  However, if spider mode is
2155          on or time-stamping is employed, HEAD_ONLY commands is
2156          encoded within *dt.  */
2157       if (opt.spider || (use_ts && !got_head))
2158         *dt |= HEAD_ONLY;
2159       else
2160         *dt &= ~HEAD_ONLY;
2161
2162       /* Decide whether or not to restart.  */
2163       if (opt.always_rest
2164           && stat (locf, &st) == 0
2165           && S_ISREG (st.st_mode))
2166         /* When -c is used, continue from on-disk size.  (Can't use
2167            hstat.len even if count>1 because we don't want a failed
2168            first attempt to clobber existing data.)  */
2169         hstat.restval = st.st_size;
2170       else if (count > 1)
2171         /* otherwise, continue where the previous try left off */
2172         hstat.restval = hstat.len;
2173       else
2174         hstat.restval = 0;
2175
2176       /* Decide whether to send the no-cache directive.  We send it in
2177          two cases:
2178            a) we're using a proxy, and we're past our first retrieval.
2179               Some proxies are notorious for caching incomplete data, so
2180               we require a fresh get.
2181            b) caching is explicitly inhibited. */
2182       if ((proxy && count > 1)  /* a */
2183           || !opt.allow_cache   /* b */
2184           )
2185         *dt |= SEND_NOCACHE;
2186       else
2187         *dt &= ~SEND_NOCACHE;
2188
2189       /* Try fetching the document, or at least its head.  */
2190       err = gethttp (u, &hstat, dt, proxy);
2191
2192       /* It's unfortunate that wget determines the local filename before finding
2193          out the Content-Type of the file.  Barring a major restructuring of the
2194          code, we need to re-set locf here, since gethttp() may have xrealloc()d
2195          *hstat.local_file to tack on ".html". */
2196       if (!opt.output_document)
2197         locf = *hstat.local_file;
2198
2199       /* Time?  */
2200       tms = time_str (NULL);
2201       /* Get the new location (with or without the redirection).  */
2202       if (hstat.newloc)
2203         *newloc = xstrdup (hstat.newloc);
2204       switch (err)
2205         {
2206         case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2207         case CONERROR: case READERR: case WRITEFAILED:
2208         case RANGEERR: case FOPEN_EXCL_ERR:
2209           /* Non-fatal errors continue executing the loop, which will
2210              bring them to "while" statement at the end, to judge
2211              whether the number of tries was exceeded.  */
2212           free_hstat (&hstat);
2213           printwhat (count, opt.ntry);
2214           if (err == FOPEN_EXCL_ERR)
2215             {
2216               /* Re-determine the file name. */
2217               if (local_file && *local_file)
2218                 {
2219                   xfree (*local_file);
2220                   *local_file = url_file_name (u);
2221                   hstat.local_file = local_file;
2222                 }
2223               else
2224                 {
2225                   xfree (dummy);
2226                   dummy = url_file_name (u);
2227                   hstat.local_file = &dummy;
2228                 }
2229               /* be honest about where we will save the file */
2230               if (local_file && opt.output_document)
2231                 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2232               if (!opt.output_document)
2233                 locf = *hstat.local_file;
2234               else
2235                 locf = opt.output_document;
2236             }
2237           continue;
2238         case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2239         case SSLINITFAILED: case CONTNOTSUPPORTED:
2240           /* Fatal errors just return from the function.  */
2241           free_hstat (&hstat);
2242           xfree_null (dummy);
2243           return err;
2244         case FWRITEERR: case FOPENERR:
2245           /* Another fatal error.  */
2246           logputs (LOG_VERBOSE, "\n");
2247           logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2248                      *hstat.local_file, strerror (errno));
2249           free_hstat (&hstat);
2250           xfree_null (dummy);
2251           return err;
2252         case CONSSLERR:
2253           /* Another fatal error.  */
2254           logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2255           free_hstat (&hstat);
2256           xfree_null (dummy);
2257           return err;
2258         case NEWLOCATION:
2259           /* Return the new location to the caller.  */
2260           if (!hstat.newloc)
2261             {
2262               logprintf (LOG_NOTQUIET,
2263                          _("ERROR: Redirection (%d) without location.\n"),
2264                          hstat.statcode);
2265               free_hstat (&hstat);
2266               xfree_null (dummy);
2267               return WRONGCODE;
2268             }
2269           free_hstat (&hstat);
2270           xfree_null (dummy);
2271           return NEWLOCATION;
2272         case RETRUNNEEDED:
2273           /* The file was already fully retrieved. */
2274           free_hstat (&hstat);
2275           xfree_null (dummy);
2276           return RETROK;
2277         case RETRFINISHED:
2278           /* Deal with you later.  */
2279           break;
2280         default:
2281           /* All possibilities should have been exhausted.  */
2282           abort ();
2283         }
2284       if (!(*dt & RETROKF))
2285         {
2286           if (!opt.verbose)
2287             {
2288               /* #### Ugly ugly ugly! */
2289               char *hurl = url_string (u, true);
2290               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2291               xfree (hurl);
2292             }
2293           logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2294                      tms, hstat.statcode, escnonprint (hstat.error));
2295           logputs (LOG_VERBOSE, "\n");
2296           free_hstat (&hstat);
2297           xfree_null (dummy);
2298           return WRONGCODE;
2299         }
2300
2301       /* Did we get the time-stamp?  */
2302       if (!got_head)
2303         {
2304           if (opt.timestamping && !hstat.remote_time)
2305             {
2306               logputs (LOG_NOTQUIET, _("\
2307 Last-modified header missing -- time-stamps turned off.\n"));
2308             }
2309           else if (hstat.remote_time)
2310             {
2311               /* Convert the date-string into struct tm.  */
2312               tmr = http_atotm (hstat.remote_time);
2313               if (tmr == (time_t) (-1))
2314                 logputs (LOG_VERBOSE, _("\
2315 Last-modified header invalid -- time-stamp ignored.\n"));
2316             }
2317         }
2318
2319       /* The time-stamping section.  */
2320       if (use_ts)
2321         {
2322           got_head = true;
2323           *dt &= ~HEAD_ONLY;
2324           use_ts = false;               /* no more time-stamping */
2325           count = 0;            /* the retrieve count for HEAD is
2326                                    reset */
2327           if (hstat.remote_time && tmr != (time_t) (-1))
2328             {
2329               /* Now time-stamping can be used validly.  Time-stamping
2330                  means that if the sizes of the local and remote file
2331                  match, and local file is newer than the remote file,
2332                  it will not be retrieved.  Otherwise, the normal
2333                  download procedure is resumed.  */
2334               if (tml >= tmr &&
2335                   (hstat.contlen == -1 || local_size == hstat.contlen))
2336                 {
2337                   logprintf (LOG_VERBOSE, _("\
2338 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2339                              local_filename);
2340                   free_hstat (&hstat);
2341                   xfree_null (dummy);
2342                   return RETROK;
2343                 }
2344               else if (tml >= tmr)
2345                 logprintf (LOG_VERBOSE, _("\
2346 The sizes do not match (local %s) -- retrieving.\n"),
2347                            number_to_static_string (local_size));
2348               else
2349                 logputs (LOG_VERBOSE,
2350                          _("Remote file is newer, retrieving.\n"));
2351             }
2352           free_hstat (&hstat);
2353           continue;
2354         }
2355       if ((tmr != (time_t) (-1))
2356           && !opt.spider
2357           && ((hstat.len == hstat.contlen) ||
2358               ((hstat.res == 0) && (hstat.contlen == -1))))
2359         {
2360           /* #### This code repeats in http.c and ftp.c.  Move it to a
2361              function!  */
2362           const char *fl = NULL;
2363           if (opt.output_document)
2364             {
2365               if (output_stream_regular)
2366                 fl = opt.output_document;
2367             }
2368           else
2369             fl = *hstat.local_file;
2370           if (fl)
2371             touch (fl, tmr);
2372         }
2373       /* End of time-stamping section.  */
2374
2375       if (opt.spider)
2376         {
2377           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
2378                      escnonprint (hstat.error));
2379           xfree_null (dummy);
2380           return RETROK;
2381         }
2382
2383       tmrate = retr_rate (hstat.rd_size, hstat.dltime);
2384       total_download_time += hstat.dltime;
2385
2386       if (hstat.len == hstat.contlen)
2387         {
2388           if (*dt & RETROKF)
2389             {
2390               logprintf (LOG_VERBOSE,
2391                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2392                          tms, tmrate, locf,
2393                          number_to_static_string (hstat.len),
2394                          number_to_static_string (hstat.contlen));
2395               logprintf (LOG_NONVERBOSE,
2396                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2397                          tms, u->url,
2398                          number_to_static_string (hstat.len),
2399                          number_to_static_string (hstat.contlen),
2400                          locf, count);
2401             }
2402           ++opt.numurls;
2403           total_downloaded_bytes += hstat.len;
2404
2405           /* Remember that we downloaded the file for later ".orig" code. */
2406           if (*dt & ADDED_HTML_EXTENSION)
2407             downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2408           else
2409             downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2410
2411           free_hstat (&hstat);
2412           xfree_null (dummy);
2413           return RETROK;
2414         }
2415       else if (hstat.res == 0) /* No read error */
2416         {
2417           if (hstat.contlen == -1)  /* We don't know how much we were supposed
2418                                        to get, so assume we succeeded. */
2419             {
2420               if (*dt & RETROKF)
2421                 {
2422                   logprintf (LOG_VERBOSE,
2423                              _("%s (%s) - `%s' saved [%s]\n\n"),
2424                              tms, tmrate, locf,
2425                              number_to_static_string (hstat.len));
2426                   logprintf (LOG_NONVERBOSE,
2427                              "%s URL:%s [%s] -> \"%s\" [%d]\n",
2428                              tms, u->url, number_to_static_string (hstat.len),
2429                              locf, count);
2430                 }
2431               ++opt.numurls;
2432               total_downloaded_bytes += hstat.len;
2433
2434               /* Remember that we downloaded the file for later ".orig" code. */
2435               if (*dt & ADDED_HTML_EXTENSION)
2436                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2437               else
2438                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2439
2440               free_hstat (&hstat);
2441               xfree_null (dummy);
2442               return RETROK;
2443             }
2444           else if (hstat.len < hstat.contlen) /* meaning we lost the
2445                                                  connection too soon */
2446             {
2447               logprintf (LOG_VERBOSE,
2448                          _("%s (%s) - Connection closed at byte %s. "),
2449                          tms, tmrate, number_to_static_string (hstat.len));
2450               printwhat (count, opt.ntry);
2451               free_hstat (&hstat);
2452               continue;
2453             }
2454           else
2455             /* Getting here would mean reading more data than
2456                requested with content-length, which we never do.  */
2457             abort ();
2458         }
2459       else                      /* now hstat.res can only be -1 */
2460         {
2461           if (hstat.contlen == -1)
2462             {
2463               logprintf (LOG_VERBOSE,
2464                          _("%s (%s) - Read error at byte %s (%s)."),
2465                          tms, tmrate, number_to_static_string (hstat.len),
2466                          hstat.rderrmsg);
2467               printwhat (count, opt.ntry);
2468               free_hstat (&hstat);
2469               continue;
2470             }
2471           else                  /* hstat.res == -1 and contlen is given */
2472             {
2473               logprintf (LOG_VERBOSE,
2474                          _("%s (%s) - Read error at byte %s/%s (%s). "),
2475                          tms, tmrate,
2476                          number_to_static_string (hstat.len),
2477                          number_to_static_string (hstat.contlen),
2478                          hstat.rderrmsg);
2479               printwhat (count, opt.ntry);
2480               free_hstat (&hstat);
2481               continue;
2482             }
2483         }
2484       /* not reached */
2485     }
2486   while (!opt.ntry || (count < opt.ntry));
2487   return TRYLIMEXC;
2488 }
2489 \f
2490 /* Check whether the result of strptime() indicates success.
2491    strptime() returns the pointer to how far it got to in the string.
2492    The processing has been successful if the string is at `GMT' or
2493    `+X', or at the end of the string.
2494
2495    In extended regexp parlance, the function returns 1 if P matches
2496    "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
2497    can return) is considered a failure and 0 is returned.  */
2498 static bool
2499 check_end (const char *p)
2500 {
2501   if (!p)
2502     return false;
2503   while (ISSPACE (*p))
2504     ++p;
2505   if (!*p
2506       || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2507       || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2508     return true;
2509   else
2510     return false;
2511 }
2512
2513 /* Convert the textual specification of time in TIME_STRING to the
2514    number of seconds since the Epoch.
2515
2516    TIME_STRING can be in any of the three formats RFC2616 allows the
2517    HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2518    as well as the time format used in the Set-Cookie header.
2519    Timezones are ignored, and should be GMT.
2520
2521    Return the computed time_t representation, or -1 if the conversion
2522    fails.
2523
2524    This function uses strptime with various string formats for parsing
2525    TIME_STRING.  This results in a parser that is not as lenient in
2526    interpreting TIME_STRING as I would like it to be.  Being based on
2527    strptime, it always allows shortened months, one-digit days, etc.,
2528    but due to the multitude of formats in which time can be
2529    represented, an ideal HTTP time parser would be even more
2530    forgiving.  It should completely ignore things like week days and
2531    concentrate only on the various forms of representing years,
2532    months, days, hours, minutes, and seconds.  For example, it would
2533    be nice if it accepted ISO 8601 out of the box.
2534
2535    I've investigated free and PD code for this purpose, but none was
2536    usable.  getdate was big and unwieldy, and had potential copyright
2537    issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
2538    distributed with phttpd, is excellent, but we cannot use it because
2539    it is not assigned to the FSF.  So I stuck it with strptime.  */
2540
2541 time_t
2542 http_atotm (const char *time_string)
2543 {
2544   /* NOTE: Solaris strptime man page claims that %n and %t match white
2545      space, but that's not universally available.  Instead, we simply
2546      use ` ' to mean "skip all WS", which works under all strptime
2547      implementations I've tested.  */
2548
2549   static const char *time_formats[] = {
2550     "%a, %d %b %Y %T",          /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2551     "%A, %d-%b-%y %T",          /* rfc850:  Thursday, 29-Jan-98 22:12:57 */
2552     "%a %b %d %T %Y",           /* asctime: Thu Jan 29 22:12:57 1998 */
2553     "%a, %d-%b-%Y %T"           /* cookies: Thu, 29-Jan-1998 22:12:57
2554                                    (used in Set-Cookie, defined in the
2555                                    Netscape cookie specification.) */
2556   };
2557   const char *oldlocale;
2558   int i;
2559   time_t ret = (time_t) -1;
2560
2561   /* Solaris strptime fails to recognize English month names in
2562      non-English locales, which we work around by temporarily setting
2563      locale to C before invoking strptime.  */
2564   oldlocale = setlocale (LC_TIME, NULL);
2565   setlocale (LC_TIME, "C");
2566
2567   for (i = 0; i < countof (time_formats); i++)
2568     {
2569       struct tm t;
2570
2571       /* Some versions of strptime use the existing contents of struct
2572          tm to recalculate the date according to format.  Zero it out
2573          to prevent stack garbage from influencing strptime.  */
2574       xzero (t);
2575
2576       if (check_end (strptime (time_string, time_formats[i], &t)))
2577         {
2578           ret = timegm (&t);
2579           break;
2580         }
2581     }
2582
2583   /* Restore the previous locale. */
2584   setlocale (LC_TIME, oldlocale);
2585
2586   return ret;
2587 }
2588 \f
2589 /* Authorization support: We support three authorization schemes:
2590
2591    * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2592
2593    * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2594    consisting of answering to the server's challenge with the proper
2595    MD5 digests.
2596
2597    * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2598    Stenberg for libcurl.  Like digest, NTLM is based on a
2599    challenge-response mechanism, but unlike digest, it is non-standard
2600    (authenticates TCP connections rather than requests), undocumented
2601    and Microsoft-specific.  */
2602
2603 /* Create the authentication header contents for the `Basic' scheme.
2604    This is done by encoding the string "USER:PASS" to base64 and
2605    prepending the string "Basic " in front of it.  */
2606
2607 static char *
2608 basic_authentication_encode (const char *user, const char *passwd)
2609 {
2610   char *t1, *t2;
2611   int len1 = strlen (user) + 1 + strlen (passwd);
2612
2613   t1 = (char *)alloca (len1 + 1);
2614   sprintf (t1, "%s:%s", user, passwd);
2615
2616   t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2617   base64_encode (t1, len1, t2);
2618
2619   return concat_strings ("Basic ", t2, (char *) 0);
2620 }
2621
2622 #define SKIP_WS(x) do {                         \
2623   while (ISSPACE (*(x)))                        \
2624     ++(x);                                      \
2625 } while (0)
2626
2627 #ifdef ENABLE_DIGEST
2628 /* Parse HTTP `WWW-Authenticate:' header.  AU points to the beginning
2629    of a field in such a header.  If the field is the one specified by
2630    ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2631    digest authorization code), extract its value in the (char*)
2632    variable pointed by RET.  Returns negative on a malformed header,
2633    or number of bytes that have been parsed by this call.  */
2634 static int
2635 extract_header_attr (const char *au, const char *attr_name, char **ret)
2636 {
2637   const char *ep;
2638   const char *cp = au;
2639
2640   if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2641     {
2642       cp += strlen (attr_name);
2643       if (!*cp)
2644         return -1;
2645       SKIP_WS (cp);
2646       if (*cp != '=')
2647         return -1;
2648       if (!*++cp)
2649         return -1;
2650       SKIP_WS (cp);
2651       if (*cp != '\"')
2652         return -1;
2653       if (!*++cp)
2654         return -1;
2655       for (ep = cp; *ep && *ep != '\"'; ep++)
2656         ;
2657       if (!*ep)
2658         return -1;
2659       xfree_null (*ret);
2660       *ret = strdupdelim (cp, ep);
2661       return ep - au + 1;
2662     }
2663   else
2664     return 0;
2665 }
2666
2667 /* Dump the hexadecimal representation of HASH to BUF.  HASH should be
2668    an array of 16 bytes containing the hash keys, and BUF should be a
2669    buffer of 33 writable characters (32 for hex digits plus one for
2670    zero termination).  */
2671 static void
2672 dump_hash (char *buf, const unsigned char *hash)
2673 {
2674   int i;
2675
2676   for (i = 0; i < MD5_HASHLEN; i++, hash++)
2677     {
2678       *buf++ = XNUM_TO_digit (*hash >> 4);
2679       *buf++ = XNUM_TO_digit (*hash & 0xf);
2680     }
2681   *buf = '\0';
2682 }
2683
2684 /* Take the line apart to find the challenge, and compose a digest
2685    authorization header.  See RFC2069 section 2.1.2.  */
2686 static char *
2687 digest_authentication_encode (const char *au, const char *user,
2688                               const char *passwd, const char *method,
2689                               const char *path)
2690 {
2691   static char *realm, *opaque, *nonce;
2692   static struct {
2693     const char *name;
2694     char **variable;
2695   } options[] = {
2696     { "realm", &realm },
2697     { "opaque", &opaque },
2698     { "nonce", &nonce }
2699   };
2700   char *res;
2701
2702   realm = opaque = nonce = NULL;
2703
2704   au += 6;                      /* skip over `Digest' */
2705   while (*au)
2706     {
2707       int i;
2708
2709       SKIP_WS (au);
2710       for (i = 0; i < countof (options); i++)
2711         {
2712           int skip = extract_header_attr (au, options[i].name,
2713                                           options[i].variable);
2714           if (skip < 0)
2715             {
2716               xfree_null (realm);
2717               xfree_null (opaque);
2718               xfree_null (nonce);
2719               return NULL;
2720             }
2721           else if (skip)
2722             {
2723               au += skip;
2724               break;
2725             }
2726         }
2727       if (i == countof (options))
2728         {
2729           while (*au && *au != '=')
2730             au++;
2731           if (*au && *++au)
2732             {
2733               SKIP_WS (au);
2734               if (*au == '\"')
2735                 {
2736                   au++;
2737                   while (*au && *au != '\"')
2738                     au++;
2739                   if (*au)
2740                     au++;
2741                 }
2742             }
2743         }
2744       while (*au && *au != ',')
2745         au++;
2746       if (*au)
2747         au++;
2748     }
2749   if (!realm || !nonce || !user || !passwd || !path || !method)
2750     {
2751       xfree_null (realm);
2752       xfree_null (opaque);
2753       xfree_null (nonce);
2754       return NULL;
2755     }
2756
2757   /* Calculate the digest value.  */
2758   {
2759     ALLOCA_MD5_CONTEXT (ctx);
2760     unsigned char hash[MD5_HASHLEN];
2761     char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2762     char response_digest[MD5_HASHLEN * 2 + 1];
2763
2764     /* A1BUF = H(user ":" realm ":" password) */
2765     gen_md5_init (ctx);
2766     gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2767     gen_md5_update ((unsigned char *)":", 1, ctx);
2768     gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2769     gen_md5_update ((unsigned char *)":", 1, ctx);
2770     gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2771     gen_md5_finish (ctx, hash);
2772     dump_hash (a1buf, hash);
2773
2774     /* A2BUF = H(method ":" path) */
2775     gen_md5_init (ctx);
2776     gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2777     gen_md5_update ((unsigned char *)":", 1, ctx);
2778     gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2779     gen_md5_finish (ctx, hash);
2780     dump_hash (a2buf, hash);
2781
2782     /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2783     gen_md5_init (ctx);
2784     gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
2785     gen_md5_update ((unsigned char *)":", 1, ctx);
2786     gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2787     gen_md5_update ((unsigned char *)":", 1, ctx);
2788     gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
2789     gen_md5_finish (ctx, hash);
2790     dump_hash (response_digest, hash);
2791
2792     res = xmalloc (strlen (user)
2793                    + strlen (user)
2794                    + strlen (realm)
2795                    + strlen (nonce)
2796                    + strlen (path)
2797                    + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2798                    + (opaque ? strlen (opaque) : 0)
2799                    + 128);
2800     sprintf (res, "Digest \
2801 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2802              user, realm, nonce, path, response_digest);
2803     if (opaque)
2804       {
2805         char *p = res + strlen (res);
2806         strcat (p, ", opaque=\"");
2807         strcat (p, opaque);
2808         strcat (p, "\"");
2809       }
2810   }
2811   return res;
2812 }
2813 #endif /* ENABLE_DIGEST */
2814
2815 /* Computing the size of a string literal must take into account that
2816    value returned by sizeof includes the terminating \0.  */
2817 #define STRSIZE(literal) (sizeof (literal) - 1)
2818
2819 /* Whether chars in [b, e) begin with the literal string provided as
2820    first argument and are followed by whitespace or terminating \0.
2821    The comparison is case-insensitive.  */
2822 #define STARTS(literal, b, e)                           \
2823   ((e) - (b) >= STRSIZE (literal)                       \
2824    && 0 == strncasecmp (b, literal, STRSIZE (literal))  \
2825    && ((e) - (b) == STRSIZE (literal)                   \
2826        || ISSPACE (b[STRSIZE (literal)])))
2827
2828 static bool
2829 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2830 {
2831   return STARTS ("Basic", hdrbeg, hdrend)
2832 #ifdef ENABLE_DIGEST
2833     || STARTS ("Digest", hdrbeg, hdrend)
2834 #endif
2835 #ifdef ENABLE_NTLM
2836     || STARTS ("NTLM", hdrbeg, hdrend)
2837 #endif
2838     ;
2839 }
2840
2841 #undef STARTS
2842
2843 /* Create the HTTP authorization request header.  When the
2844    `WWW-Authenticate' response header is seen, according to the
2845    authorization scheme specified in that header (`Basic' and `Digest'
2846    are supported by the current implementation), produce an
2847    appropriate HTTP authorization request header.  */
2848 static char *
2849 create_authorization_line (const char *au, const char *user,
2850                            const char *passwd, const char *method,
2851                            const char *path, bool *finished)
2852 {
2853   /* We are called only with known schemes, so we can dispatch on the
2854      first letter. */
2855   switch (TOUPPER (*au))
2856     {
2857     case 'B':                   /* Basic */
2858       *finished = true;
2859       return basic_authentication_encode (user, passwd);
2860 #ifdef ENABLE_DIGEST
2861     case 'D':                   /* Digest */
2862       *finished = true;
2863       return digest_authentication_encode (au, user, passwd, method, path);
2864 #endif
2865 #ifdef ENABLE_NTLM
2866     case 'N':                   /* NTLM */
2867       if (!ntlm_input (&pconn.ntlm, au))
2868         {
2869           *finished = true;
2870           return NULL;
2871         }
2872       return ntlm_output (&pconn.ntlm, user, passwd, finished);
2873 #endif
2874     default:
2875       /* We shouldn't get here -- this function should be only called
2876          with values approved by known_authentication_scheme_p.  */
2877       abort ();
2878     }
2879 }
2880 \f
2881 void
2882 save_cookies (void)
2883 {
2884   if (wget_cookie_jar)
2885     cookie_jar_save (wget_cookie_jar, opt.cookies_output);
2886 }
2887
2888 void
2889 http_cleanup (void)
2890 {
2891   xfree_null (pconn.host);
2892   if (wget_cookie_jar)
2893     cookie_jar_delete (wget_cookie_jar);
2894 }