sjero.net Git - wget/blob - src/http.c

   1 /* HTTP support.
   2    Copyright (C) 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9  (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif
  38 #include <assert.h>
  39 #include <errno.h>
  40 #include <time.h>
  41
  42 #include "wget.h"
  43 #include "utils.h"
  44 #include "url.h"
  45 #include "host.h"
  46 #include "retr.h"
  47 #include "connect.h"
  48 #include "netrc.h"
  49 #ifdef HAVE_SSL
  50 # include "ssl.h"
  51 #endif
  52 #ifdef ENABLE_NTLM
  53 # include "http-ntlm.h"
  54 #endif
  55 #include "cookies.h"
  56 #ifdef ENABLE_DIGEST
  57 # include "gen-md5.h"
  58 #endif
  59 #include "convert.h"
  60
  61 extern char *version_string;
  62 extern LARGE_INT total_downloaded_bytes;
  63
  64 extern FILE *output_stream;
  65 extern int output_stream_regular;
  66
  67 #ifndef MIN
  68 # define MIN(x, y) ((x) > (y) ? (y) : (x))
  69 #endif
  70
  71 \f
  72 static int cookies_loaded_p;
  73 static struct cookie_jar *wget_cookie_jar;
  74
  75 #define TEXTHTML_S "text/html"
  76 #define TEXTXHTML_S "application/xhtml+xml"
  77
  78 /* Some status code validation macros: */
  79 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
  80 #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
  81 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY           \
  82                          || (x) == HTTP_STATUS_MOVED_TEMPORARILY        \
  83                          || (x) == HTTP_STATUS_SEE_OTHER                \
  84                          || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
  85
  86 /* HTTP/1.0 status codes from RFC1945, provided for reference.  */
  87 /* Successful 2xx.  */
  88 #define HTTP_STATUS_OK                  200
  89 #define HTTP_STATUS_CREATED             201
  90 #define HTTP_STATUS_ACCEPTED            202
  91 #define HTTP_STATUS_NO_CONTENT          204
  92 #define HTTP_STATUS_PARTIAL_CONTENTS    206
  93
  94 /* Redirection 3xx.  */
  95 #define HTTP_STATUS_MULTIPLE_CHOICES    300
  96 #define HTTP_STATUS_MOVED_PERMANENTLY   301
  97 #define HTTP_STATUS_MOVED_TEMPORARILY   302
  98 #define HTTP_STATUS_SEE_OTHER           303 /* from HTTP/1.1 */
  99 #define HTTP_STATUS_NOT_MODIFIED        304
 100 #define HTTP_STATUS_TEMPORARY_REDIRECT  307 /* from HTTP/1.1 */
 101
 102 /* Client error 4xx.  */
 103 #define HTTP_STATUS_BAD_REQUEST         400
 104 #define HTTP_STATUS_UNAUTHORIZED        401
 105 #define HTTP_STATUS_FORBIDDEN           403
 106 #define HTTP_STATUS_NOT_FOUND           404
 107 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
 108
 109 /* Server errors 5xx.  */
 110 #define HTTP_STATUS_INTERNAL            500
 111 #define HTTP_STATUS_NOT_IMPLEMENTED     501
 112 #define HTTP_STATUS_BAD_GATEWAY         502
 113 #define HTTP_STATUS_UNAVAILABLE         503
 114 \f
 115 enum rp {
 116   rel_none, rel_name, rel_value, rel_both
 117 };
 118
 119 struct request {
 120   const char *method;
 121   char *arg;
 122
 123   struct request_header {
 124     char *name, *value;
 125     enum rp release_policy;
 126   } *headers;
 127   int hcount, hcapacity;
 128 };
 129
 130 /* Create a new, empty request.  At least request_set_method must be
 131    called before the request can be used.  */
 132
 133 static struct request *
 134 request_new (void)
 135 {
 136   struct request *req = xnew0 (struct request);
 137   req->hcapacity = 8;
 138   req->headers = xnew_array (struct request_header, req->hcapacity);
 139   return req;
 140 }
 141
 142 /* Set the request's method and its arguments.  METH should be a
 143    literal string (or it should outlive the request) because it will
 144    not be freed.  ARG will be freed by request_free.  */
 145
 146 static void
 147 request_set_method (struct request *req, const char *meth, char *arg)
 148 {
 149   req->method = meth;
 150   req->arg = arg;
 151 }
 152
 153 /* Return the method string passed with the last call to
 154    request_set_method.  */
 155
 156 static const char *
 157 request_method (const struct request *req)
 158 {
 159   return req->method;
 160 }
 161
 162 /* Free one header according to the release policy specified with
 163    request_set_header.  */
 164
 165 static void
 166 release_header (struct request_header *hdr)
 167 {
 168   switch (hdr->release_policy)
 169     {
 170     case rel_none:
 171       break;
 172     case rel_name:
 173       xfree (hdr->name);
 174       break;
 175     case rel_value:
 176       xfree (hdr->value);
 177       break;
 178     case rel_both:
 179       xfree (hdr->name);
 180       xfree (hdr->value);
 181       break;
 182     }
 183 }
 184
 185 /* Set the request named NAME to VALUE.  Specifically, this means that
 186    a "NAME: VALUE\r\n" header line will be used in the request.  If a
 187    header with the same name previously existed in the request, its
 188    value will be replaced by this one.  A NULL value means do nothing.
 189
 190    RELEASE_POLICY determines whether NAME and VALUE should be released
 191    (freed) with request_free.  Allowed values are:
 192
 193     - rel_none     - don't free NAME or VALUE
 194     - rel_name     - free NAME when done
 195     - rel_value    - free VALUE when done
 196     - rel_both     - free both NAME and VALUE when done
 197
 198    Setting release policy is useful when arguments come from different
 199    sources.  For example:
 200
 201      // Don't free literal strings!
 202      request_set_header (req, "Pragma", "no-cache", rel_none);
 203
 204      // Don't free a global variable, we'll need it later.
 205      request_set_header (req, "Referer", opt.referer, rel_none);
 206
 207      // Value freshly allocated, free it when done.
 208      request_set_header (req, "Range",
 209                          aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
 210                          rel_value);
 211    */
 212
 213 static void
 214 request_set_header (struct request *req, char *name, char *value,
 215                     enum rp release_policy)
 216 {
 217   struct request_header *hdr;
 218   int i;
 219
 220   if (!value)
 221     {
 222       /* A NULL value is a no-op; if freeing the name is requested,
 223          free it now to avoid leaks.  */
 224       if (release_policy == rel_name || release_policy == rel_both)
 225         xfree (name);
 226       return;
 227     }
 228
 229   for (i = 0; i < req->hcount; i++)
 230     {
 231       hdr = &req->headers[i];
 232       if (0 == strcasecmp (name, hdr->name))
 233         {
 234           /* Replace existing header. */
 235           release_header (hdr);
 236           hdr->name = name;
 237           hdr->value = value;
 238           hdr->release_policy = release_policy;
 239           return;
 240         }
 241     }
 242
 243   /* Install new header. */
 244
 245   if (req->hcount >= req->hcapacity)
 246     {
 247       req->hcapacity <<= 1;
 248       req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
 249     }
 250   hdr = &req->headers[req->hcount++];
 251   hdr->name = name;
 252   hdr->value = value;
 253   hdr->release_policy = release_policy;
 254 }
 255
 256 /* Like request_set_header, but sets the whole header line, as
 257    provided by the user using the `--header' option.  For example,
 258    request_set_user_header (req, "Foo: bar") works just like
 259    request_set_header (req, "Foo", "bar").  */
 260
 261 static void
 262 request_set_user_header (struct request *req, const char *header)
 263 {
 264   char *name;
 265   const char *p = strchr (header, ':');
 266   if (!p)
 267     return;
 268   BOUNDED_TO_ALLOCA (header, p, name);
 269   ++p;
 270   while (ISSPACE (*p))
 271     ++p;
 272   request_set_header (req, xstrdup (name), (char *) p, rel_name);
 273 }
 274
 275 /* Remove the header with specified name from REQ.  Returns 1 if the
 276    header was actually removed, 0 otherwise.  */
 277
 278 static int
 279 request_remove_header (struct request *req, char *name)
 280 {
 281   int i;
 282   for (i = 0; i < req->hcount; i++)
 283     {
 284       struct request_header *hdr = &req->headers[i];
 285       if (0 == strcasecmp (name, hdr->name))
 286         {
 287           release_header (hdr);
 288           /* Move the remaining headers by one. */
 289           if (i < req->hcount - 1)
 290             memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
 291           --req->hcount;
 292           return 1;
 293         }
 294     }
 295   return 0;
 296 }
 297
 298 #define APPEND(p, str) do {                     \
 299   int A_len = strlen (str);                     \
 300   memcpy (p, str, A_len);                       \
 301   p += A_len;                                   \
 302 } while (0)
 303
 304 /* Construct the request and write it to FD using fd_write.  */
 305
 306 static int
 307 request_send (const struct request *req, int fd)
 308 {
 309   char *request_string, *p;
 310   int i, size, write_error;
 311
 312   /* Count the request size. */
 313   size = 0;
 314
 315   /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
 316   size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
 317
 318   for (i = 0; i < req->hcount; i++)
 319     {
 320       struct request_header *hdr = &req->headers[i];
 321       /* NAME ": " VALUE "\r\n" */
 322       size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
 323     }
 324
 325   /* "\r\n\0" */
 326   size += 3;
 327
 328   p = request_string = alloca_array (char, size);
 329
 330   /* Generate the request. */
 331
 332   APPEND (p, req->method); *p++ = ' ';
 333   APPEND (p, req->arg);    *p++ = ' ';
 334   memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
 335
 336   for (i = 0; i < req->hcount; i++)
 337     {
 338       struct request_header *hdr = &req->headers[i];
 339       APPEND (p, hdr->name);
 340       *p++ = ':', *p++ = ' ';
 341       APPEND (p, hdr->value);
 342       *p++ = '\r', *p++ = '\n';
 343     }
 344
 345   *p++ = '\r', *p++ = '\n', *p++ = '\0';
 346   assert (p - request_string == size);
 347
 348 #undef APPEND
 349
 350   DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
 351
 352   /* Send the request to the server. */
 353
 354   write_error = fd_write (fd, request_string, size - 1, -1);
 355   if (write_error < 0)
 356     logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
 357                strerror (errno));
 358   return write_error;
 359 }
 360
 361 /* Release the resources used by REQ. */
 362
 363 static void
 364 request_free (struct request *req)
 365 {
 366   int i;
 367   xfree_null (req->arg);
 368   for (i = 0; i < req->hcount; i++)
 369     release_header (&req->headers[i]);
 370   xfree_null (req->headers);
 371   xfree (req);
 372 }
 373
 374 /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
 375    PROMISED_SIZE bytes are sent over the wire -- if the file is
 376    longer, read only that much; if the file is shorter, report an error.  */
 377
 378 static int
 379 post_file (int sock, const char *file_name, wgint promised_size)
 380 {
 381   static char chunk[8192];
 382   wgint written = 0;
 383   int write_error;
 384   FILE *fp;
 385
 386   DEBUGP (("[writing POST file %s ... ", file_name));
 387
 388   fp = fopen (file_name, "rb");
 389   if (!fp)
 390     return -1;
 391   while (!feof (fp) && written < promised_size)
 392     {
 393       int towrite;
 394       int length = fread (chunk, 1, sizeof (chunk), fp);
 395       if (length == 0)
 396         break;
 397       towrite = MIN (promised_size - written, length);
 398       write_error = fd_write (sock, chunk, towrite, -1);
 399       if (write_error < 0)
 400         {
 401           fclose (fp);
 402           return -1;
 403         }
 404       written += towrite;
 405     }
 406   fclose (fp);
 407
 408   /* If we've written less than was promised, report a (probably
 409      nonsensical) error rather than break the promise.  */
 410   if (written < promised_size)
 411     {
 412       errno = EINVAL;
 413       return -1;
 414     }
 415
 416   assert (written == promised_size);
 417   DEBUGP (("done]\n"));
 418   return 0;
 419 }
 420 \f
 421 static const char *
 422 response_head_terminator (const char *hunk, int oldlen, int peeklen)
 423 {
 424   const char *start, *end;
 425
 426   /* If at first peek, verify whether HUNK starts with "HTTP".  If
 427      not, this is a HTTP/0.9 request and we must bail out without
 428      reading anything.  */
 429   if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
 430     return hunk;
 431
 432   if (oldlen < 4)
 433     start = hunk;
 434   else
 435     start = hunk + oldlen - 4;
 436   end = hunk + oldlen + peeklen;
 437
 438   for (; start < end - 1; start++)
 439     if (*start == '\n')
 440       {
 441         if (start < end - 2
 442             && start[1] == '\r'
 443             && start[2] == '\n')
 444           return start + 3;
 445         if (start[1] == '\n')
 446           return start + 2;
 447       }
 448   return NULL;
 449 }
 450
 451 /* The maximum size of a single HTTP response we care to read.  This
 452    is not meant to impose an arbitrary limit, but to protect the user
 453    from Wget slurping up available memory upon encountering malicious
 454    or buggy server output.  Define it to 0 to remove the limit.  */
 455
 456 #define HTTP_RESPONSE_MAX_SIZE 65536
 457
 458 /* Read the HTTP request head from FD and return it.  The error
 459    conditions are the same as with fd_read_hunk.
 460
 461    To support HTTP/0.9 responses, this function tries to make sure
 462    that the data begins with "HTTP".  If this is not the case, no data
 463    is read and an empty request is returned, so that the remaining
 464    data can be treated as body.  */
 465
 466 static char *
 467 read_http_response_head (int fd)
 468 {
 469   return fd_read_hunk (fd, response_head_terminator, 512,
 470                        HTTP_RESPONSE_MAX_SIZE);
 471 }
 472
 473 struct response {
 474   /* The response data. */
 475   const char *data;
 476
 477   /* The array of pointers that indicate where each header starts.
 478      For example, given this HTTP response:
 479
 480        HTTP/1.0 200 Ok
 481        Description: some
 482         text
 483        Etag: x
 484
 485      The headers are located like this:
 486
 487      "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
 488      ^                   ^                             ^          ^
 489      headers[0]          headers[1]                    headers[2] headers[3]
 490
 491      I.e. headers[0] points to the beginning of the request,
 492      headers[1] points to the end of the first header and the
 493      beginning of the second one, etc.  */
 494
 495   const char **headers;
 496 };
 497
 498 /* Create a new response object from the text of the HTTP response,
 499    available in HEAD.  That text is automatically split into
 500    constituent header lines for fast retrieval using
 501    resp_header_*.  */
 502
 503 static struct response *
 504 resp_new (const char *head)
 505 {
 506   const char *hdr;
 507   int count, size;
 508
 509   struct response *resp = xnew0 (struct response);
 510   resp->data = head;
 511
 512   if (*head == '\0')
 513     {
 514       /* Empty head means that we're dealing with a headerless
 515          (HTTP/0.9) response.  In that case, don't set HEADERS at
 516          all.  */
 517       return resp;
 518     }
 519
 520   /* Split HEAD into header lines, so that resp_header_* functions
 521      don't need to do this over and over again.  */
 522
 523   size = count = 0;
 524   hdr = head;
 525   while (1)
 526     {
 527       DO_REALLOC (resp->headers, size, count + 1, const char *);
 528       resp->headers[count++] = hdr;
 529
 530       /* Break upon encountering an empty line. */
 531       if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
 532         break;
 533
 534       /* Find the end of HDR, including continuations. */
 535       do
 536         {
 537           const char *end = strchr (hdr, '\n');
 538           if (end)
 539             hdr = end + 1;
 540           else
 541             hdr += strlen (hdr);
 542         }
 543       while (*hdr == ' ' || *hdr == '\t');
 544     }
 545   DO_REALLOC (resp->headers, size, count + 1, const char *);
 546   resp->headers[count] = NULL;
 547
 548   return resp;
 549 }
 550
 551 /* Locate the header named NAME in the request data, starting with
 552    position START.  This allows the code to loop through the request
 553    data, filtering for all requests of a given name.  Returns the
 554    found position, or -1 for failure.  The code that uses this
 555    function typically looks like this:
 556
 557      for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
 558        ... do something with header ...
 559
 560    If you only care about one header, use resp_header_get instead of
 561    this function.  */
 562
 563 static int
 564 resp_header_locate (const struct response *resp, const char *name, int start,
 565                     const char **begptr, const char **endptr)
 566 {
 567   int i;
 568   const char **headers = resp->headers;
 569   int name_len;
 570
 571   if (!headers || !headers[1])
 572     return -1;
 573
 574   name_len = strlen (name);
 575   if (start > 0)
 576     i = start;
 577   else
 578     i = 1;
 579
 580   for (; headers[i + 1]; i++)
 581     {
 582       const char *b = headers[i];
 583       const char *e = headers[i + 1];
 584       if (e - b > name_len
 585           && b[name_len] == ':'
 586           && 0 == strncasecmp (b, name, name_len))
 587         {
 588           b += name_len + 1;
 589           while (b < e && ISSPACE (*b))
 590             ++b;
 591           while (b < e && ISSPACE (e[-1]))
 592             --e;
 593           *begptr = b;
 594           *endptr = e;
 595           return i;
 596         }
 597     }
 598   return -1;
 599 }
 600
 601 /* Find and retrieve the header named NAME in the request data.  If
 602    found, set *BEGPTR to its starting, and *ENDPTR to its ending
 603    position, and return 1.  Otherwise return 0.
 604
 605    This function is used as a building block for resp_header_copy
 606    and resp_header_strdup.  */
 607
 608 static int
 609 resp_header_get (const struct response *resp, const char *name,
 610                  const char **begptr, const char **endptr)
 611 {
 612   int pos = resp_header_locate (resp, name, 0, begptr, endptr);
 613   return pos != -1;
 614 }
 615
 616 /* Copy the response header named NAME to buffer BUF, no longer than
 617    BUFSIZE (BUFSIZE includes the terminating 0).  If the header
 618    exists, 1 is returned, otherwise 0.  If there should be no limit on
 619    the size of the header, use resp_header_strdup instead.
 620
 621    If BUFSIZE is 0, no data is copied, but the boolean indication of
 622    whether the header is present is still returned.  */
 623
 624 static int
 625 resp_header_copy (const struct response *resp, const char *name,
 626                   char *buf, int bufsize)
 627 {
 628   const char *b, *e;
 629   if (!resp_header_get (resp, name, &b, &e))
 630     return 0;
 631   if (bufsize)
 632     {
 633       int len = MIN (e - b, bufsize - 1);
 634       memcpy (buf, b, len);
 635       buf[len] = '\0';
 636     }
 637   return 1;
 638 }
 639
 640 /* Return the value of header named NAME in RESP, allocated with
 641    malloc.  If such a header does not exist in RESP, return NULL.  */
 642
 643 static char *
 644 resp_header_strdup (const struct response *resp, const char *name)
 645 {
 646   const char *b, *e;
 647   if (!resp_header_get (resp, name, &b, &e))
 648     return NULL;
 649   return strdupdelim (b, e);
 650 }
 651
 652 /* Parse the HTTP status line, which is of format:
 653
 654    HTTP-Version SP Status-Code SP Reason-Phrase
 655
 656    The function returns the status-code, or -1 if the status line
 657    appears malformed.  The pointer to "reason-phrase" message is
 658    returned in *MESSAGE.  */
 659
 660 static int
 661 resp_status (const struct response *resp, char **message)
 662 {
 663   int status;
 664   const char *p, *end;
 665
 666   if (!resp->headers)
 667     {
 668       /* For a HTTP/0.9 response, assume status 200. */
 669       if (message)
 670         *message = xstrdup (_("No headers, assuming HTTP/0.9"));
 671       return 200;
 672     }
 673
 674   p = resp->headers[0];
 675   end = resp->headers[1];
 676
 677   if (!end)
 678     return -1;
 679
 680   /* "HTTP" */
 681   if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
 682     return -1;
 683   p += 4;
 684
 685   /* Match the HTTP version.  This is optional because Gnutella
 686      servers have been reported to not specify HTTP version.  */
 687   if (p < end && *p == '/')
 688     {
 689       ++p;
 690       while (p < end && ISDIGIT (*p))
 691         ++p;
 692       if (p < end && *p == '.')
 693         ++p;
 694       while (p < end && ISDIGIT (*p))
 695         ++p;
 696     }
 697
 698   while (p < end && ISSPACE (*p))
 699     ++p;
 700   if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
 701     return -1;
 702
 703   status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
 704   p += 3;
 705
 706   if (message)
 707     {
 708       while (p < end && ISSPACE (*p))
 709         ++p;
 710       while (p < end && ISSPACE (end[-1]))
 711         --end;
 712       *message = strdupdelim (p, end);
 713     }
 714
 715   return status;
 716 }
 717
 718 /* Release the resources used by RESP.  */
 719
 720 static void
 721 resp_free (struct response *resp)
 722 {
 723   xfree_null (resp->headers);
 724   xfree (resp);
 725 }
 726
 727 /* Print the server response, line by line, omitting the trailing CRLF
 728    from individual header lines, and prefixed with PREFIX.  */
 729
 730 static void
 731 print_server_response (const struct response *resp, const char *prefix)
 732 {
 733   int i;
 734   if (!resp->headers)
 735     return;
 736   for (i = 0; resp->headers[i + 1]; i++)
 737     {
 738       const char *b = resp->headers[i];
 739       const char *e = resp->headers[i + 1];
 740       /* Skip CRLF */
 741       if (b < e && e[-1] == '\n')
 742         --e;
 743       if (b < e && e[-1] == '\r')
 744         --e;
 745       /* This is safe even on printfs with broken handling of "%.<n>s"
 746          because resp->headers ends with \0.  */
 747       logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
 748     }
 749 }
 750
 751 /* Parse the `Content-Range' header and extract the information it
 752    contains.  Returns 1 if successful, -1 otherwise.  */
 753 static int
 754 parse_content_range (const char *hdr, wgint *first_byte_ptr,
 755                      wgint *last_byte_ptr, wgint *entity_length_ptr)
 756 {
 757   wgint num;
 758
 759   /* Ancient versions of Netscape proxy server, presumably predating
 760      rfc2068, sent out `Content-Range' without the "bytes"
 761      specifier.  */
 762   if (!strncasecmp (hdr, "bytes", 5))
 763     {
 764       hdr += 5;
 765       /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
 766          HTTP spec. */
 767       if (*hdr == ':')
 768         ++hdr;
 769       while (ISSPACE (*hdr))
 770         ++hdr;
 771       if (!*hdr)
 772         return 0;
 773     }
 774   if (!ISDIGIT (*hdr))
 775     return 0;
 776   for (num = 0; ISDIGIT (*hdr); hdr++)
 777     num = 10 * num + (*hdr - '0');
 778   if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
 779     return 0;
 780   *first_byte_ptr = num;
 781   ++hdr;
 782   for (num = 0; ISDIGIT (*hdr); hdr++)
 783     num = 10 * num + (*hdr - '0');
 784   if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
 785     return 0;
 786   *last_byte_ptr = num;
 787   ++hdr;
 788   for (num = 0; ISDIGIT (*hdr); hdr++)
 789     num = 10 * num + (*hdr - '0');
 790   *entity_length_ptr = num;
 791   return 1;
 792 }
 793
 794 /* Read the body of the request, but don't store it anywhere and don't
 795    display a progress gauge.  This is useful for reading the bodies of
 796    administrative responses to which we will soon issue another
 797    request.  The response is not useful to the user, but reading it
 798    allows us to continue using the same connection to the server.
 799
 800    If reading fails, 0 is returned, non-zero otherwise.  In debug
 801    mode, the body is displayed for debugging purposes.  */
 802
 803 static int
 804 skip_short_body (int fd, wgint contlen)
 805 {
 806   enum {
 807     SKIP_SIZE = 512,            /* size of the download buffer */
 808     SKIP_THRESHOLD = 4096       /* the largest size we read */
 809   };
 810   char dlbuf[SKIP_SIZE + 1];
 811   dlbuf[SKIP_SIZE] = '\0';      /* so DEBUGP can safely print it */
 812
 813   /* We shouldn't get here with unknown contlen.  (This will change
 814      with HTTP/1.1, which supports "chunked" transfer.)  */
 815   assert (contlen != -1);
 816
 817   /* If the body is too large, it makes more sense to simply close the
 818      connection than to try to read the body.  */
 819   if (contlen > SKIP_THRESHOLD)
 820     return 0;
 821
 822   DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
 823
 824   while (contlen > 0)
 825     {
 826       int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
 827       if (ret <= 0)
 828         {
 829           /* Don't normally report the error since this is an
 830              optimization that should be invisible to the user.  */
 831           DEBUGP (("] aborting (%s).\n",
 832                    ret < 0 ? strerror (errno) : "EOF received"));
 833           return 0;
 834         }
 835       contlen -= ret;
 836       /* Safe even if %.*s bogusly expects terminating \0 because
 837          we've zero-terminated dlbuf above.  */
 838       DEBUGP (("%.*s", ret, dlbuf));
 839     }
 840
 841   DEBUGP (("] done.\n"));
 842   return 1;
 843 }
 844 \f
 845 /* Persistent connections.  Currently, we cache the most recently used
 846    connection as persistent, provided that the HTTP server agrees to
 847    make it such.  The persistence data is stored in the variables
 848    below.  Ideally, it should be possible to cache an arbitrary fixed
 849    number of these connections.  */
 850
 851 /* Whether a persistent connection is active. */
 852 static int pconn_active;
 853
 854 static struct {
 855   /* The socket of the connection.  */
 856   int socket;
 857
 858   /* Host and port of the currently active persistent connection. */
 859   char *host;
 860   int port;
 861
 862   /* Whether a ssl handshake has occoured on this connection.  */
 863   int ssl;
 864
 865   /* Whether the connection was authorized.  This is only done by
 866      NTLM, which authorizes *connections* rather than individual
 867      requests.  (That practice is peculiar for HTTP, but it is a
 868      useful optimization.)  */
 869   int authorized;
 870
 871 #ifdef ENABLE_NTLM
 872   /* NTLM data of the current connection.  */
 873   struct ntlmdata ntlm;
 874 #endif
 875 } pconn;
 876
 877 /* Mark the persistent connection as invalid and free the resources it
 878    uses.  This is used by the CLOSE_* macros after they forcefully
 879    close a registered persistent connection.  */
 880
 881 static void
 882 invalidate_persistent (void)
 883 {
 884   DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
 885   pconn_active = 0;
 886   fd_close (pconn.socket);
 887   xfree (pconn.host);
 888   xzero (pconn);
 889 }
 890
 891 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
 892    persistent.  This will enable someone to use the same connection
 893    later.  In the context of HTTP, this must be called only AFTER the
 894    response has been received and the server has promised that the
 895    connection will remain alive.
 896
 897    If a previous connection was persistent, it is closed. */
 898
 899 static void
 900 register_persistent (const char *host, int port, int fd, int ssl)
 901 {
 902   if (pconn_active)
 903     {
 904       if (pconn.socket == fd)
 905         {
 906           /* The connection FD is already registered. */
 907           return;
 908         }
 909       else
 910         {
 911           /* The old persistent connection is still active; close it
 912              first.  This situation arises whenever a persistent
 913              connection exists, but we then connect to a different
 914              host, and try to register a persistent connection to that
 915              one.  */
 916           invalidate_persistent ();
 917         }
 918     }
 919
 920   pconn_active = 1;
 921   pconn.socket = fd;
 922   pconn.host = xstrdup (host);
 923   pconn.port = port;
 924   pconn.ssl = ssl;
 925   pconn.authorized = 0;
 926
 927   DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
 928 }
 929
 930 /* Return non-zero if a persistent connection is available for
 931    connecting to HOST:PORT.  */
 932
 933 static int
 934 persistent_available_p (const char *host, int port, int ssl,
 935                         int *host_lookup_failed)
 936 {
 937   /* First, check whether a persistent connection is active at all.  */
 938   if (!pconn_active)
 939     return 0;
 940
 941   /* If we want SSL and the last connection wasn't or vice versa,
 942      don't use it.  Checking for host and port is not enough because
 943      HTTP and HTTPS can apparently coexist on the same port.  */
 944   if (ssl != pconn.ssl)
 945     return 0;
 946
 947   /* If we're not connecting to the same port, we're not interested. */
 948   if (port != pconn.port)
 949     return 0;
 950
 951   /* If the host is the same, we're in business.  If not, there is
 952      still hope -- read below.  */
 953   if (0 != strcasecmp (host, pconn.host))
 954     {
 955       /* Check if pconn.socket is talking to HOST under another name.
 956          This happens often when both sites are virtual hosts
 957          distinguished only by name and served by the same network
 958          interface, and hence the same web server (possibly set up by
 959          the ISP and serving many different web sites).  This
 960          admittedly unconventional optimization does not contradict
 961          HTTP and works well with popular server software.  */
 962
 963       int found;
 964       ip_address ip;
 965       struct address_list *al;
 966
 967       if (ssl)
 968         /* Don't try to talk to two different SSL sites over the same
 969            secure connection!  (Besides, it's not clear that
 970            name-based virtual hosting is even possible with SSL.)  */
 971         return 0;
 972
 973       /* If pconn.socket's peer is one of the IP addresses HOST
 974          resolves to, pconn.socket is for all intents and purposes
 975          already talking to HOST.  */
 976
 977       if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
 978         {
 979           /* Can't get the peer's address -- something must be very
 980              wrong with the connection.  */
 981           invalidate_persistent ();
 982           return 0;
 983         }
 984       al = lookup_host (host, 0);
 985       if (!al)
 986         {
 987           *host_lookup_failed = 1;
 988           return 0;
 989         }
 990
 991       found = address_list_contains (al, &ip);
 992       address_list_release (al);
 993
 994       if (!found)
 995         return 0;
 996
 997       /* The persistent connection's peer address was found among the
 998          addresses HOST resolved to; therefore, pconn.sock is in fact
 999          already talking to HOST -- no need to reconnect.  */
1000     }
1001
1002   /* Finally, check whether the connection is still open.  This is
1003      important because most server implement a liberal (short) timeout
1004      on persistent connections.  Wget can of course always reconnect
1005      if the connection doesn't work out, but it's nicer to know in
1006      advance.  This test is a logical followup of the first test, but
1007      is "expensive" and therefore placed at the end of the list.  */
1008
1009   if (!test_socket_open (pconn.socket))
1010     {
1011       /* Oops, the socket is no longer open.  Now that we know that,
1012          let's invalidate the persistent connection before returning
1013          0.  */
1014       invalidate_persistent ();
1015       return 0;
1016     }
1017
1018   return 1;
1019 }
1020
1021 /* The idea behind these two CLOSE macros is to distinguish between
1022    two cases: one when the job we've been doing is finished, and we
1023    want to close the connection and leave, and two when something is
1024    seriously wrong and we're closing the connection as part of
1025    cleanup.
1026
1027    In case of keep_alive, CLOSE_FINISH should leave the connection
1028    open, while CLOSE_INVALIDATE should still close it.
1029
1030    Note that the semantics of the flag `keep_alive' is "this
1031    connection *will* be reused (the server has promised not to close
1032    the connection once we're done)", while the semantics of
1033    `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1034    active, registered connection".  */
1035
1036 #define CLOSE_FINISH(fd) do {                   \
1037   if (!keep_alive)                              \
1038     {                                           \
1039       if (pconn_active && (fd) == pconn.socket) \
1040         invalidate_persistent ();               \
1041       else                                      \
1042         {                                       \
1043           fd_close (fd);                        \
1044           fd = -1;                              \
1045         }                                       \
1046     }                                           \
1047 } while (0)
1048
1049 #define CLOSE_INVALIDATE(fd) do {               \
1050   if (pconn_active && (fd) == pconn.socket)     \
1051     invalidate_persistent ();                   \
1052   else                                          \
1053     fd_close (fd);                              \
1054   fd = -1;                                      \
1055 } while (0)
1056 \f
1057 struct http_stat
1058 {
1059   wgint len;                    /* received length */
1060   wgint contlen;                        /* expected length */
1061   wgint restval;                        /* the restart value */
1062   int res;                      /* the result of last read */
1063   char *newloc;                 /* new location (redirection) */
1064   char *remote_time;            /* remote time-stamp string */
1065   char *error;                  /* textual HTTP error */
1066   int statcode;                 /* status code */
1067   wgint rd_size;                        /* amount of data read from socket */
1068   double dltime;                /* time it took to download the data */
1069   const char *referer;          /* value of the referer header. */
1070   char **local_file;            /* local file. */
1071 };
1072
1073 static void
1074 free_hstat (struct http_stat *hs)
1075 {
1076   xfree_null (hs->newloc);
1077   xfree_null (hs->remote_time);
1078   xfree_null (hs->error);
1079
1080   /* Guard against being called twice. */
1081   hs->newloc = NULL;
1082   hs->remote_time = NULL;
1083   hs->error = NULL;
1084 }
1085
1086 static char *create_authorization_line (const char *, const char *,
1087                                         const char *, const char *,
1088                                         const char *, int *);
1089 static char *basic_authentication_encode (const char *, const char *);
1090 static int known_authentication_scheme_p (const char *, const char *);
1091
1092 time_t http_atotm (const char *);
1093
1094 #define BEGINS_WITH(line, string_constant)                              \
1095   (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)   \
1096    && (ISSPACE (line[sizeof (string_constant) - 1])                     \
1097        || !line[sizeof (string_constant) - 1]))
1098
1099 #define SET_USER_AGENT(req) do {                                        \
1100   if (!opt.useragent)                                                   \
1101     request_set_header (req, "User-Agent",                              \
1102                         aprintf ("Wget/%s", version_string), rel_value); \
1103   else if (*opt.useragent)                                              \
1104     request_set_header (req, "User-Agent", opt.useragent, rel_none);    \
1105 } while (0)
1106
1107 /* The flags that allow clobbering the file (opening with "wb").
1108    Defined here to avoid repetition later.  #### This will require
1109    rework.  */
1110 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1111                        || opt.dirstruct || opt.output_document)
1112
1113 /* Retrieve a document through HTTP protocol.  It recognizes status
1114    code, and correctly handles redirections.  It closes the network
1115    socket.  If it receives an error from the functions below it, it
1116    will print it if there is enough information to do so (almost
1117    always), returning the error to the caller (i.e. http_loop).
1118
1119    Various HTTP parameters are stored to hs.
1120
1121    If PROXY is non-NULL, the connection will be made to the proxy
1122    server, and u->url will be requested.  */
1123 static uerr_t
1124 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1125 {
1126   struct request *req;
1127
1128   char *type;
1129   char *user, *passwd;
1130   char *proxyauth;
1131   int statcode;
1132   int write_error;
1133   wgint contlen, contrange;
1134   struct url *conn;
1135   FILE *fp;
1136
1137   int sock = -1;
1138   int flags;
1139
1140   /* Set to 1 when the authorization has failed permanently and should
1141      not be tried again. */
1142   int auth_finished = 0;
1143
1144   /* Whether NTLM authentication is used for this request. */
1145   int ntlm_seen = 0;
1146
1147   /* Whether our connection to the remote host is through SSL.  */
1148   int using_ssl = 0;
1149
1150   /* Whether a HEAD request will be issued (as opposed to GET or
1151      POST). */
1152   int head_only = *dt & HEAD_ONLY;
1153
1154   char *head;
1155   struct response *resp;
1156   char hdrval[256];
1157   char *message;
1158
1159   /* Whether this connection will be kept alive after the HTTP request
1160      is done. */
1161   int keep_alive;
1162
1163   /* Whether keep-alive should be inhibited.
1164
1165      RFC 2068 requests that 1.0 clients not send keep-alive requests
1166      to proxies.  This is because many 1.0 proxies do not interpret
1167      the Connection header and transfer it to the remote server,
1168      causing it to not close the connection and leave both the proxy
1169      and the client hanging.  */
1170   int inhibit_keep_alive =
1171     !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1172
1173   /* Headers sent when using POST. */
1174   wgint post_data_size = 0;
1175
1176   int host_lookup_failed = 0;
1177
1178 #ifdef HAVE_SSL
1179   if (u->scheme == SCHEME_HTTPS)
1180     {
1181       /* Initialize the SSL context.  After this has once been done,
1182          it becomes a no-op.  */
1183       if (!ssl_init ())
1184         {
1185           scheme_disable (SCHEME_HTTPS);
1186           logprintf (LOG_NOTQUIET,
1187                      _("Disabling SSL due to encountered errors.\n"));
1188           return SSLINITFAILED;
1189         }
1190     }
1191 #endif /* HAVE_SSL */
1192
1193   if (!head_only)
1194     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1195        know the local filename so we can save to it. */
1196     assert (*hs->local_file != NULL);
1197
1198   /* Initialize certain elements of struct http_stat.  */
1199   hs->len = 0;
1200   hs->contlen = -1;
1201   hs->res = -1;
1202   hs->newloc = NULL;
1203   hs->remote_time = NULL;
1204   hs->error = NULL;
1205
1206   conn = u;
1207
1208   /* Prepare the request to send. */
1209
1210   req = request_new ();
1211   {
1212     char *meth_arg;
1213     const char *meth = "GET";
1214     if (head_only)
1215       meth = "HEAD";
1216     else if (opt.post_file_name || opt.post_data)
1217       meth = "POST";
1218     /* Use the full path, i.e. one that includes the leading slash and
1219        the query string.  E.g. if u->path is "foo/bar" and u->query is
1220        "param=value", full_path will be "/foo/bar?param=value".  */
1221     if (proxy
1222 #ifdef HAVE_SSL
1223         /* When using SSL over proxy, CONNECT establishes a direct
1224            connection to the HTTPS server.  Therefore use the same
1225            argument as when talking to the server directly. */
1226         && u->scheme != SCHEME_HTTPS
1227 #endif
1228         )
1229       meth_arg = xstrdup (u->url);
1230     else
1231       meth_arg = url_full_path (u);
1232     request_set_method (req, meth, meth_arg);
1233   }
1234
1235   request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1236   if (*dt & SEND_NOCACHE)
1237     request_set_header (req, "Pragma", "no-cache", rel_none);
1238   if (hs->restval)
1239     request_set_header (req, "Range",
1240                         aprintf ("bytes=%s-",
1241                                  number_to_static_string (hs->restval)),
1242                         rel_value);
1243   SET_USER_AGENT (req);
1244   request_set_header (req, "Accept", "*/*", rel_none);
1245
1246   /* Find the username and password for authentication. */
1247   user = u->user;
1248   passwd = u->passwd;
1249   search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1250   user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1251   passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1252
1253   if (user && passwd)
1254     {
1255       /* We have the username and the password, but haven't tried
1256          any authorization yet.  Let's see if the "Basic" method
1257          works.  If not, we'll come back here and construct a
1258          proper authorization method with the right challenges.
1259
1260          If we didn't employ this kind of logic, every URL that
1261          requires authorization would have to be processed twice,
1262          which is very suboptimal and generates a bunch of false
1263          "unauthorized" errors in the server log.
1264
1265          #### But this logic also has a serious problem when used
1266          with stronger authentications: we *first* transmit the
1267          username and the password in clear text, and *then* attempt a
1268          stronger authentication scheme.  That cannot be right!  We
1269          are only fortunate that almost everyone still uses the
1270          `Basic' scheme anyway.
1271
1272          There should be an option to prevent this from happening, for
1273          those who use strong authentication schemes and value their
1274          passwords.  */
1275       request_set_header (req, "Authorization",
1276                           basic_authentication_encode (user, passwd),
1277                           rel_value);
1278     }
1279
1280   proxyauth = NULL;
1281   if (proxy)
1282     {
1283       char *proxy_user, *proxy_passwd;
1284       /* For normal username and password, URL components override
1285          command-line/wgetrc parameters.  With proxy
1286          authentication, it's the reverse, because proxy URLs are
1287          normally the "permanent" ones, so command-line args
1288          should take precedence.  */
1289       if (opt.proxy_user && opt.proxy_passwd)
1290         {
1291           proxy_user = opt.proxy_user;
1292           proxy_passwd = opt.proxy_passwd;
1293         }
1294       else
1295         {
1296           proxy_user = proxy->user;
1297           proxy_passwd = proxy->passwd;
1298         }
1299       /* #### This does not appear right.  Can't the proxy request,
1300          say, `Digest' authentication?  */
1301       if (proxy_user && proxy_passwd)
1302         proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1303
1304       /* If we're using a proxy, we will be connecting to the proxy
1305          server.  */
1306       conn = proxy;
1307
1308       /* Proxy authorization over SSL is handled below. */
1309 #ifdef HAVE_SSL
1310       if (u->scheme != SCHEME_HTTPS)
1311 #endif
1312         request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1313     }
1314
1315   {
1316     /* Whether we need to print the host header with braces around
1317        host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1318        usual "Host: symbolic-name:1234". */
1319     int squares = strchr (u->host, ':') != NULL;
1320     if (u->port == scheme_default_port (u->scheme))
1321       request_set_header (req, "Host",
1322                           aprintf (squares ? "[%s]" : "%s", u->host),
1323                           rel_value);
1324     else
1325       request_set_header (req, "Host",
1326                           aprintf (squares ? "[%s]:%d" : "%s:%d",
1327                                    u->host, u->port),
1328                           rel_value);
1329   }
1330
1331   if (!inhibit_keep_alive)
1332     request_set_header (req, "Connection", "Keep-Alive", rel_none);
1333
1334   if (opt.cookies)
1335     request_set_header (req, "Cookie",
1336                         cookie_header (wget_cookie_jar,
1337                                        u->host, u->port, u->path,
1338 #ifdef HAVE_SSL
1339                                        u->scheme == SCHEME_HTTPS
1340 #else
1341                                        0
1342 #endif
1343                                        ),
1344                         rel_value);
1345
1346   if (opt.post_data || opt.post_file_name)
1347     {
1348       request_set_header (req, "Content-Type",
1349                           "application/x-www-form-urlencoded", rel_none);
1350       if (opt.post_data)
1351         post_data_size = strlen (opt.post_data);
1352       else
1353         {
1354           post_data_size = file_size (opt.post_file_name);
1355           if (post_data_size == -1)
1356             {
1357               logprintf (LOG_NOTQUIET, _("POST data file missing: %s (%s)\n"),
1358                          opt.post_file_name, strerror (errno));
1359               post_data_size = 0;
1360             }
1361         }
1362       request_set_header (req, "Content-Length",
1363                           xstrdup (number_to_static_string (post_data_size)),
1364                           rel_value);
1365     }
1366
1367   /* Add the user headers. */
1368   if (opt.user_headers)
1369     {
1370       int i;
1371       for (i = 0; opt.user_headers[i]; i++)
1372         request_set_user_header (req, opt.user_headers[i]);
1373     }
1374
1375  retry_with_auth:
1376   /* We need to come back here when the initial attempt to retrieve
1377      without authorization header fails.  (Expected to happen at least
1378      for the Digest authorization scheme.)  */
1379
1380   keep_alive = 0;
1381
1382   /* Establish the connection.  */
1383
1384   if (!inhibit_keep_alive)
1385     {
1386       /* Look for a persistent connection to target host, unless a
1387          proxy is used.  The exception is when SSL is in use, in which
1388          case the proxy is nothing but a passthrough to the target
1389          host, registered as a connection to the latter.  */
1390       struct url *relevant = conn;
1391 #ifdef HAVE_SSL
1392       if (u->scheme == SCHEME_HTTPS)
1393         relevant = u;
1394 #endif
1395
1396       if (persistent_available_p (relevant->host, relevant->port,
1397 #ifdef HAVE_SSL
1398                                   relevant->scheme == SCHEME_HTTPS,
1399 #else
1400                                   0,
1401 #endif
1402                                   &host_lookup_failed))
1403         {
1404           sock = pconn.socket;
1405           using_ssl = pconn.ssl;
1406           logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1407                      escnonprint (pconn.host), pconn.port);
1408           DEBUGP (("Reusing fd %d.\n", sock));
1409           if (pconn.authorized)
1410             /* If the connection is already authorized, the "Basic"
1411                authorization added by code above is unnecessary and
1412                only hurts us.  */
1413             request_remove_header (req, "Authorization");
1414         }
1415     }
1416
1417   if (sock < 0)
1418     {
1419       /* In its current implementation, persistent_available_p will
1420          look up conn->host in some cases.  If that lookup failed, we
1421          don't need to bother with connect_to_host.  */
1422       if (host_lookup_failed)
1423         {
1424           request_free (req);
1425           return HOSTERR;
1426         }
1427
1428       sock = connect_to_host (conn->host, conn->port);
1429       if (sock == E_HOST)
1430         {
1431           request_free (req);
1432           return HOSTERR;
1433         }
1434       else if (sock < 0)
1435         {
1436           request_free (req);
1437           return (retryable_socket_connect_error (errno)
1438                   ? CONERROR : CONIMPOSSIBLE);
1439         }
1440
1441 #ifdef HAVE_SSL
1442       if (proxy && u->scheme == SCHEME_HTTPS)
1443         {
1444           /* When requesting SSL URLs through proxies, use the
1445              CONNECT method to request passthrough.  */
1446           struct request *connreq = request_new ();
1447           request_set_method (connreq, "CONNECT",
1448                               aprintf ("%s:%d", u->host, u->port));
1449           SET_USER_AGENT (connreq);
1450           if (proxyauth)
1451             {
1452               request_set_header (connreq, "Proxy-Authorization",
1453                                   proxyauth, rel_value);
1454               /* Now that PROXYAUTH is part of the CONNECT request,
1455                  zero it out so we don't send proxy authorization with
1456                  the regular request below.  */
1457               proxyauth = NULL;
1458             }
1459           /* Examples in rfc2817 use the Host header in CONNECT
1460              requests.  I don't see how that gains anything, given
1461              that the contents of Host would be exactly the same as
1462              the contents of CONNECT.  */
1463
1464           write_error = request_send (connreq, sock);
1465           request_free (connreq);
1466           if (write_error < 0)
1467             {
1468               logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1469                          strerror (errno));
1470               CLOSE_INVALIDATE (sock);
1471               return WRITEFAILED;
1472             }
1473
1474           head = read_http_response_head (sock);
1475           if (!head)
1476             {
1477               logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1478                          strerror (errno));
1479               CLOSE_INVALIDATE (sock);
1480               return HERR;
1481             }
1482           message = NULL;
1483           if (!*head)
1484             {
1485               xfree (head);
1486               goto failed_tunnel;
1487             }
1488           DEBUGP (("proxy responded with: [%s]\n", head));
1489
1490           resp = resp_new (head);
1491           statcode = resp_status (resp, &message);
1492           resp_free (resp);
1493           xfree (head);
1494           if (statcode != 200)
1495             {
1496             failed_tunnel:
1497               logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1498                          message ? escnonprint (message) : "?");
1499               xfree_null (message);
1500               return CONSSLERR;
1501             }
1502           xfree_null (message);
1503
1504           /* SOCK is now *really* connected to u->host, so update CONN
1505              to reflect this.  That way register_persistent will
1506              register SOCK as being connected to u->host:u->port.  */
1507           conn = u;
1508         }
1509
1510       if (conn->scheme == SCHEME_HTTPS)
1511         {
1512           if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1513             {
1514               fd_close (sock);
1515               return CONSSLERR;
1516             }
1517           using_ssl = 1;
1518         }
1519 #endif /* HAVE_SSL */
1520     }
1521
1522   /* Send the request to server.  */
1523   write_error = request_send (req, sock);
1524
1525   if (write_error >= 0)
1526     {
1527       if (opt.post_data)
1528         {
1529           DEBUGP (("[POST data: %s]\n", opt.post_data));
1530           write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1531         }
1532       else if (opt.post_file_name && post_data_size != 0)
1533         write_error = post_file (sock, opt.post_file_name, post_data_size);
1534     }
1535
1536   if (write_error < 0)
1537     {
1538       logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1539                  strerror (errno));
1540       CLOSE_INVALIDATE (sock);
1541       request_free (req);
1542       return WRITEFAILED;
1543     }
1544   logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1545              proxy ? "Proxy" : "HTTP");
1546   contlen = -1;
1547   contrange = 0;
1548   *dt &= ~RETROKF;
1549
1550   head = read_http_response_head (sock);
1551   if (!head)
1552     {
1553       if (errno == 0)
1554         {
1555           logputs (LOG_NOTQUIET, _("No data received.\n"));
1556           CLOSE_INVALIDATE (sock);
1557           request_free (req);
1558           return HEOF;
1559         }
1560       else
1561         {
1562           logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1563                      strerror (errno));
1564           CLOSE_INVALIDATE (sock);
1565           request_free (req);
1566           return HERR;
1567         }
1568     }
1569   DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1570
1571   resp = resp_new (head);
1572
1573   /* Check for status line.  */
1574   message = NULL;
1575   statcode = resp_status (resp, &message);
1576   if (!opt.server_response)
1577     logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1578                message ? escnonprint (message) : "");
1579   else
1580     {
1581       logprintf (LOG_VERBOSE, "\n");
1582       print_server_response (resp, "  ");
1583     }
1584
1585   if (!opt.ignore_length
1586       && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1587     {
1588       wgint parsed;
1589       errno = 0;
1590       parsed = str_to_wgint (hdrval, NULL, 10);
1591       if (parsed == WGINT_MAX && errno == ERANGE)
1592         /* Out of range.
1593            #### If Content-Length is out of range, it most likely
1594            means that the file is larger than 2G and that we're
1595            compiled without LFS.  In that case we should probably
1596            refuse to even attempt to download the file.  */
1597         contlen = -1;
1598       else
1599         contlen = parsed;
1600     }
1601
1602   /* Check for keep-alive related responses. */
1603   if (!inhibit_keep_alive && contlen != -1)
1604     {
1605       if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1606         keep_alive = 1;
1607       else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1608         {
1609           if (0 == strcasecmp (hdrval, "Keep-Alive"))
1610             keep_alive = 1;
1611         }
1612     }
1613   if (keep_alive)
1614     /* The server has promised that it will not close the connection
1615        when we're done.  This means that we can register it.  */
1616     register_persistent (conn->host, conn->port, sock, using_ssl);
1617
1618   if (statcode == HTTP_STATUS_UNAUTHORIZED)
1619     {
1620       /* Authorization is required.  */
1621       if (keep_alive && !head_only && skip_short_body (sock, contlen))
1622         CLOSE_FINISH (sock);
1623       else
1624         CLOSE_INVALIDATE (sock);
1625       pconn.authorized = 0;
1626       if (!auth_finished && (user && passwd))
1627         {
1628           /* IIS sends multiple copies of WWW-Authenticate, one with
1629              the value "negotiate", and other(s) with data.  Loop over
1630              all the occurrences and pick the one we recognize.  */
1631           int wapos;
1632           const char *wabeg, *waend;
1633           char *www_authenticate = NULL;
1634           for (wapos = 0;
1635                (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1636                                             &wabeg, &waend)) != -1;
1637                ++wapos)
1638             if (known_authentication_scheme_p (wabeg, waend))
1639               {
1640                 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1641                 break;
1642               }
1643
1644           if (!www_authenticate)
1645             /* If the authentication header is missing or
1646                unrecognized, there's no sense in retrying.  */
1647             logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1648           else if (BEGINS_WITH (www_authenticate, "Basic"))
1649             /* If the authentication scheme is "Basic", which we send
1650                by default, there's no sense in retrying either.  (This
1651                should be changed when we stop sending "Basic" data by
1652                default.)  */
1653             ;
1654           else
1655             {
1656               char *pth;
1657               pth = url_full_path (u);
1658               request_set_header (req, "Authorization",
1659                                   create_authorization_line (www_authenticate,
1660                                                              user, passwd,
1661                                                              request_method (req),
1662                                                              pth,
1663                                                              &auth_finished),
1664                                   rel_value);
1665               if (BEGINS_WITH (www_authenticate, "NTLM"))
1666                 ntlm_seen = 1;
1667               xfree (pth);
1668               goto retry_with_auth;
1669             }
1670         }
1671       logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1672       request_free (req);
1673       return AUTHFAILED;
1674     }
1675   else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1676     {
1677       /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1678       if (ntlm_seen)
1679         pconn.authorized = 1;
1680     }
1681   request_free (req);
1682
1683   hs->statcode = statcode;
1684   if (statcode == -1)
1685     hs->error = xstrdup (_("Malformed status line"));
1686   else if (!*message)
1687     hs->error = xstrdup (_("(no description)"));
1688   else
1689     hs->error = xstrdup (message);
1690   xfree (message);
1691
1692   type = resp_header_strdup (resp, "Content-Type");
1693   if (type)
1694     {
1695       char *tmp = strchr (type, ';');
1696       if (tmp)
1697         {
1698           while (tmp > type && ISSPACE (tmp[-1]))
1699             --tmp;
1700           *tmp = '\0';
1701         }
1702     }
1703   hs->newloc = resp_header_strdup (resp, "Location");
1704   hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1705
1706   /* Handle (possibly multiple instances of) the Set-Cookie header. */
1707   if (opt.cookies)
1708     {
1709       char *pth = NULL;
1710       int scpos;
1711       const char *scbeg, *scend;
1712       /* The jar should have been created by now. */
1713       assert (wget_cookie_jar != NULL);
1714       for (scpos = 0;
1715            (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1716                                         &scbeg, &scend)) != -1;
1717            ++scpos)
1718         {
1719           char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1720           if (pth == NULL)
1721             {
1722               /* u->path doesn't begin with /, which cookies.c expects. */
1723               pth = (char *) alloca (1 + strlen (u->path) + 1);
1724               pth[0] = '/';
1725               strcpy (pth + 1, u->path);
1726             }
1727           cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth,
1728                                     set_cookie);
1729         }
1730     }
1731
1732   if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1733     {
1734       wgint first_byte_pos, last_byte_pos, entity_length;
1735       if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1736                                &entity_length))
1737         contrange = first_byte_pos;
1738     }
1739   resp_free (resp);
1740
1741   /* 20x responses are counted among successful by default.  */
1742   if (H_20X (statcode))
1743     *dt |= RETROKF;
1744
1745   /* Return if redirected.  */
1746   if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1747     {
1748       /* RFC2068 says that in case of the 300 (multiple choices)
1749          response, the server can output a preferred URL through
1750          `Location' header; otherwise, the request should be treated
1751          like GET.  So, if the location is set, it will be a
1752          redirection; otherwise, just proceed normally.  */
1753       if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1754         *dt |= RETROKF;
1755       else
1756         {
1757           logprintf (LOG_VERBOSE,
1758                      _("Location: %s%s\n"),
1759                      hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
1760                      hs->newloc ? _(" [following]") : "");
1761           if (keep_alive && !head_only && skip_short_body (sock, contlen))
1762             CLOSE_FINISH (sock);
1763           else
1764             CLOSE_INVALIDATE (sock);
1765           xfree_null (type);
1766           return NEWLOCATION;
1767         }
1768     }
1769
1770   /* If content-type is not given, assume text/html.  This is because
1771      of the multitude of broken CGI's that "forget" to generate the
1772      content-type.  */
1773   if (!type ||
1774         0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1775         0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1776     *dt |= TEXTHTML;
1777   else
1778     *dt &= ~TEXTHTML;
1779
1780   if (opt.html_extension && (*dt & TEXTHTML))
1781     /* -E / --html-extension / html_extension = on was specified, and this is a
1782        text/html file.  If some case-insensitive variation on ".htm[l]" isn't
1783        already the file's suffix, tack on ".html". */
1784     {
1785       char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
1786
1787       if (last_period_in_local_filename == NULL
1788           || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1789                || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1790         {
1791           int local_filename_len = strlen (*hs->local_file);
1792           /* Resize the local file, allowing for ".html" preceded by
1793              optional ".NUMBER".  */
1794           *hs->local_file = xrealloc (*hs->local_file,
1795                                       local_filename_len + 24 + sizeof (".html"));
1796           strcpy(*hs->local_file + local_filename_len, ".html");
1797           /* If clobbering is not allowed and the file, as named,
1798              exists, tack on ".NUMBER.html" instead. */
1799           if (!ALLOW_CLOBBER)
1800             {
1801               int ext_num = 1;
1802               do
1803                 sprintf (*hs->local_file + local_filename_len,
1804                          ".%d.html", ext_num++);
1805               while (file_exists_p (*hs->local_file));
1806             }
1807           *dt |= ADDED_HTML_EXTENSION;
1808         }
1809     }
1810
1811   if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
1812     {
1813       /* If `-c' is in use and the file has been fully downloaded (or
1814          the remote file has shrunk), Wget effectively requests bytes
1815          after the end of file and the server response with 416.  */
1816       logputs (LOG_VERBOSE, _("\
1817 \n    The file is already fully retrieved; nothing to do.\n\n"));
1818       /* In case the caller inspects. */
1819       hs->len = contlen;
1820       hs->res = 0;
1821       /* Mark as successfully retrieved. */
1822       *dt |= RETROKF;
1823       xfree_null (type);
1824       CLOSE_INVALIDATE (sock);  /* would be CLOSE_FINISH, but there
1825                                    might be more bytes in the body. */
1826       return RETRUNNEEDED;
1827     }
1828   if ((contrange != 0 && contrange != hs->restval)
1829       || (H_PARTIAL (statcode) && !contrange))
1830     {
1831       /* The Range request was somehow misunderstood by the server.
1832          Bail out.  */
1833       xfree_null (type);
1834       CLOSE_INVALIDATE (sock);
1835       return RANGEERR;
1836     }
1837   hs->contlen = contlen + contrange;
1838
1839   if (opt.verbose)
1840     {
1841       if (*dt & RETROKF)
1842         {
1843           /* No need to print this output if the body won't be
1844              downloaded at all, or if the original server response is
1845              printed.  */
1846           logputs (LOG_VERBOSE, _("Length: "));
1847           if (contlen != -1)
1848             {
1849               logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange));
1850               if (contlen + contrange >= 1024)
1851                 logprintf (LOG_VERBOSE, " (%s)",
1852                            human_readable (contlen + contrange));
1853               if (contrange)
1854                 {
1855                   if (contlen >= 1024)
1856                     logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
1857                                with_thousand_seps (contlen),
1858                                human_readable (contlen));
1859                   else
1860                     logprintf (LOG_VERBOSE, _(", %s remaining"),
1861                                with_thousand_seps (contlen));
1862                 }
1863             }
1864           else
1865             logputs (LOG_VERBOSE,
1866                      opt.ignore_length ? _("ignored") : _("unspecified"));
1867           if (type)
1868             logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
1869           else
1870             logputs (LOG_VERBOSE, "\n");
1871         }
1872     }
1873   xfree_null (type);
1874   type = NULL;                  /* We don't need it any more.  */
1875
1876   /* Return if we have no intention of further downloading.  */
1877   if (!(*dt & RETROKF) || head_only)
1878     {
1879       /* In case the caller cares to look...  */
1880       hs->len = 0;
1881       hs->res = 0;
1882       xfree_null (type);
1883       /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
1884          servers not to send body in response to a HEAD request.  If
1885          you encounter such a server (more likely a broken CGI), use
1886          `--no-http-keep-alive'.  */
1887       CLOSE_FINISH (sock);
1888       return RETRFINISHED;
1889     }
1890
1891   /* Open the local file.  */
1892   if (!output_stream)
1893     {
1894       mkalldirs (*hs->local_file);
1895       if (opt.backups)
1896         rotate_backups (*hs->local_file);
1897       if (hs->restval)
1898         fp = fopen (*hs->local_file, "ab");
1899       else if (ALLOW_CLOBBER)
1900         fp = fopen (*hs->local_file, "wb");
1901       else
1902         {
1903           fp = fopen_excl (*hs->local_file, 1);
1904           if (!fp && errno == EEXIST)
1905             {
1906               /* We cannot just invent a new name and use it (which is
1907                  what functions like unique_create typically do)
1908                  because we told the user we'd use this name.
1909                  Instead, return and retry the download.  */
1910               logprintf (LOG_NOTQUIET,
1911                          _("%s has sprung into existence.\n"),
1912                          *hs->local_file);
1913               CLOSE_INVALIDATE (sock);
1914               return FOPEN_EXCL_ERR;
1915             }
1916         }
1917       if (!fp)
1918         {
1919           logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1920           CLOSE_INVALIDATE (sock);
1921           return FOPENERR;
1922         }
1923     }
1924   else
1925     fp = output_stream;
1926
1927   /* #### This confuses the timestamping code that checks for file
1928      size.  Maybe we should save some additional information?  */
1929   if (opt.save_headers)
1930     fwrite (head, 1, strlen (head), fp);
1931
1932   /* Now we no longer need to store the response header. */
1933   xfree (head);
1934
1935   /* Download the request body.  */
1936   flags = 0;
1937   if (keep_alive)
1938     flags |= rb_read_exactly;
1939   if (hs->restval > 0 && contrange == 0)
1940     /* If the server ignored our range request, instruct fd_read_body
1941        to skip the first RESTVAL bytes of body.  */
1942     flags |= rb_skip_startpos;
1943   hs->len = hs->restval;
1944   hs->rd_size = 0;
1945   hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
1946                           hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
1947                           flags);
1948
1949   if (hs->res >= 0)
1950     CLOSE_FINISH (sock);
1951   else
1952     CLOSE_INVALIDATE (sock);
1953
1954   {
1955     /* Close or flush the file.  We have to be careful to check for
1956        error here.  Checking the result of fwrite() is not enough --
1957        errors could go unnoticed!  */
1958     int flush_res;
1959     if (!output_stream)
1960       flush_res = fclose (fp);
1961     else
1962       flush_res = fflush (fp);
1963     if (flush_res == EOF)
1964       hs->res = -2;
1965   }
1966   if (hs->res == -2)
1967     return FWRITEERR;
1968   return RETRFINISHED;
1969 }
1970
1971 /* The genuine HTTP loop!  This is the part where the retrieval is
1972    retried, and retried, and retried, and...  */
1973 uerr_t
1974 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1975            int *dt, struct url *proxy)
1976 {
1977   int count;
1978   int use_ts, got_head = 0;     /* time-stamping info */
1979   char *filename_plus_orig_suffix;
1980   char *local_filename = NULL;
1981   char *tms, *locf, *tmrate;
1982   uerr_t err;
1983   time_t tml = -1, tmr = -1;    /* local and remote time-stamps */
1984   wgint local_size = 0;         /* the size of the local file */
1985   size_t filename_len;
1986   struct http_stat hstat;       /* HTTP status */
1987   struct_stat st;
1988   char *dummy = NULL;
1989
1990   /* This used to be done in main(), but it's a better idea to do it
1991      here so that we don't go through the hoops if we're just using
1992      FTP or whatever. */
1993   if (opt.cookies)
1994     {
1995       if (!wget_cookie_jar)
1996         wget_cookie_jar = cookie_jar_new ();
1997       if (opt.cookies_input && !cookies_loaded_p)
1998         {
1999           cookie_jar_load (wget_cookie_jar, opt.cookies_input);
2000           cookies_loaded_p = 1;
2001         }
2002     }
2003
2004   *newloc = NULL;
2005
2006   /* Warn on (likely bogus) wildcard usage in HTTP.  */
2007   if (has_wildcards_p (u->path))
2008     logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2009
2010   xzero (hstat);
2011
2012   /* Determine the local filename.  */
2013   if (local_file && *local_file)
2014     hstat.local_file = local_file;
2015   else if (local_file && !opt.output_document)
2016     {
2017       *local_file = url_file_name (u);
2018       hstat.local_file = local_file;
2019     }
2020   else
2021     {
2022       dummy = url_file_name (u);
2023       hstat.local_file = &dummy;
2024       /* be honest about where we will save the file */
2025       if (local_file && opt.output_document)
2026         *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2027     }
2028
2029   if (!opt.output_document)
2030     locf = *hstat.local_file;
2031   else
2032     locf = opt.output_document;
2033
2034   hstat.referer = referer;
2035
2036   filename_len = strlen (*hstat.local_file);
2037   filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
2038
2039   if (opt.noclobber && file_exists_p (*hstat.local_file))
2040     {
2041       /* If opt.noclobber is turned on and file already exists, do not
2042          retrieve the file */
2043       logprintf (LOG_VERBOSE, _("\
2044 File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
2045       /* If the file is there, we suppose it's retrieved OK.  */
2046       *dt |= RETROKF;
2047
2048       /* #### Bogusness alert.  */
2049       /* If its suffix is "html" or "htm" or similar, assume text/html.  */
2050       if (has_html_suffix_p (*hstat.local_file))
2051         *dt |= TEXTHTML;
2052
2053       xfree_null (dummy);
2054       return RETROK;
2055     }
2056
2057   use_ts = 0;
2058   if (opt.timestamping)
2059     {
2060       int local_dot_orig_file_exists = 0;
2061
2062       if (opt.backup_converted)
2063         /* If -K is specified, we'll act on the assumption that it was specified
2064            last time these files were downloaded as well, and instead of just
2065            comparing local file X against server file X, we'll compare local
2066            file X.orig (if extant, else X) against server file X.  If -K
2067            _wasn't_ specified last time, or the server contains files called
2068            *.orig, -N will be back to not operating correctly with -k. */
2069         {
2070           /* Would a single s[n]printf() call be faster?  --dan
2071
2072              Definitely not.  sprintf() is horribly slow.  It's a
2073              different question whether the difference between the two
2074              affects a program.  Usually I'd say "no", but at one
2075              point I profiled Wget, and found that a measurable and
2076              non-negligible amount of time was lost calling sprintf()
2077              in url.c.  Replacing sprintf with inline calls to
2078              strcpy() and number_to_string() made a difference.
2079              --hniksic */
2080           memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
2081           memcpy (filename_plus_orig_suffix + filename_len,
2082                   ".orig", sizeof (".orig"));
2083
2084           /* Try to stat() the .orig file. */
2085           if (stat (filename_plus_orig_suffix, &st) == 0)
2086             {
2087               local_dot_orig_file_exists = 1;
2088               local_filename = filename_plus_orig_suffix;
2089             }
2090         }
2091
2092       if (!local_dot_orig_file_exists)
2093         /* Couldn't stat() <file>.orig, so try to stat() <file>. */
2094         if (stat (*hstat.local_file, &st) == 0)
2095           local_filename = *hstat.local_file;
2096
2097       if (local_filename != NULL)
2098         /* There was a local file, so we'll check later to see if the version
2099            the server has is the same version we already have, allowing us to
2100            skip a download. */
2101         {
2102           use_ts = 1;
2103           tml = st.st_mtime;
2104 #ifdef WINDOWS
2105           /* Modification time granularity is 2 seconds for Windows, so
2106              increase local time by 1 second for later comparison. */
2107           tml++;
2108 #endif
2109           local_size = st.st_size;
2110           got_head = 0;
2111         }
2112     }
2113   /* Reset the counter.  */
2114   count = 0;
2115   *dt = 0;
2116   /* THE loop */
2117   do
2118     {
2119       /* Increment the pass counter.  */
2120       ++count;
2121       sleep_between_retrievals (count);
2122       /* Get the current time string.  */
2123       tms = time_str (NULL);
2124       /* Print fetch message, if opt.verbose.  */
2125       if (opt.verbose)
2126         {
2127           char *hurl = url_string (u, 1);
2128           char tmp[256];
2129           strcpy (tmp, "        ");
2130           if (count > 1)
2131             sprintf (tmp, _("(try:%2d)"), count);
2132           logprintf (LOG_VERBOSE, "--%s--  %s\n  %s => `%s'\n",
2133                      tms, hurl, tmp, locf);
2134 #ifdef WINDOWS
2135           ws_changetitle (hurl);
2136 #endif
2137           xfree (hurl);
2138         }
2139
2140       /* Default document type is empty.  However, if spider mode is
2141          on or time-stamping is employed, HEAD_ONLY commands is
2142          encoded within *dt.  */
2143       if (opt.spider || (use_ts && !got_head))
2144         *dt |= HEAD_ONLY;
2145       else
2146         *dt &= ~HEAD_ONLY;
2147
2148       /* Decide whether or not to restart.  */
2149       if (opt.always_rest
2150           && stat (locf, &st) == 0
2151           && S_ISREG (st.st_mode))
2152         /* When -c is used, continue from on-disk size.  (Can't use
2153            hstat.len even if count>1 because we don't want a failed
2154            first attempt to clobber existing data.)  */
2155         hstat.restval = st.st_size;
2156       else if (count > 1)
2157         /* otherwise, continue where the previous try left off */
2158         hstat.restval = hstat.len;
2159       else
2160         hstat.restval = 0;
2161
2162       /* Decide whether to send the no-cache directive.  We send it in
2163          two cases:
2164            a) we're using a proxy, and we're past our first retrieval.
2165               Some proxies are notorious for caching incomplete data, so
2166               we require a fresh get.
2167            b) caching is explicitly inhibited. */
2168       if ((proxy && count > 1)  /* a */
2169           || !opt.allow_cache   /* b */
2170           )
2171         *dt |= SEND_NOCACHE;
2172       else
2173         *dt &= ~SEND_NOCACHE;
2174
2175       /* Try fetching the document, or at least its head.  */
2176       err = gethttp (u, &hstat, dt, proxy);
2177
2178       /* It's unfortunate that wget determines the local filename before finding
2179          out the Content-Type of the file.  Barring a major restructuring of the
2180          code, we need to re-set locf here, since gethttp() may have xrealloc()d
2181          *hstat.local_file to tack on ".html". */
2182       if (!opt.output_document)
2183         locf = *hstat.local_file;
2184
2185       /* Time?  */
2186       tms = time_str (NULL);
2187       /* Get the new location (with or without the redirection).  */
2188       if (hstat.newloc)
2189         *newloc = xstrdup (hstat.newloc);
2190       switch (err)
2191         {
2192         case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2193         case CONERROR: case READERR: case WRITEFAILED:
2194         case RANGEERR: case FOPEN_EXCL_ERR:
2195           /* Non-fatal errors continue executing the loop, which will
2196              bring them to "while" statement at the end, to judge
2197              whether the number of tries was exceeded.  */
2198           free_hstat (&hstat);
2199           printwhat (count, opt.ntry);
2200           if (err == FOPEN_EXCL_ERR)
2201             {
2202               /* Re-determine the file name. */
2203               if (local_file && *local_file)
2204                 {
2205                   xfree (*local_file);
2206                   *local_file = url_file_name (u);
2207                   hstat.local_file = local_file;
2208                 }
2209               else
2210                 {
2211                   xfree (dummy);
2212                   dummy = url_file_name (u);
2213                   hstat.local_file = &dummy;
2214                 }
2215               /* be honest about where we will save the file */
2216               if (local_file && opt.output_document)
2217                 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2218               if (!opt.output_document)
2219                 locf = *hstat.local_file;
2220               else
2221                 locf = opt.output_document;
2222             }
2223           continue;
2224         case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2225         case SSLINITFAILED: case CONTNOTSUPPORTED:
2226           /* Fatal errors just return from the function.  */
2227           free_hstat (&hstat);
2228           xfree_null (dummy);
2229           return err;
2230         case FWRITEERR: case FOPENERR:
2231           /* Another fatal error.  */
2232           logputs (LOG_VERBOSE, "\n");
2233           logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2234                      *hstat.local_file, strerror (errno));
2235           free_hstat (&hstat);
2236           xfree_null (dummy);
2237           return err;
2238         case CONSSLERR:
2239           /* Another fatal error.  */
2240           logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2241           free_hstat (&hstat);
2242           xfree_null (dummy);
2243           return err;
2244         case NEWLOCATION:
2245           /* Return the new location to the caller.  */
2246           if (!hstat.newloc)
2247             {
2248               logprintf (LOG_NOTQUIET,
2249                          _("ERROR: Redirection (%d) without location.\n"),
2250                          hstat.statcode);
2251               free_hstat (&hstat);
2252               xfree_null (dummy);
2253               return WRONGCODE;
2254             }
2255           free_hstat (&hstat);
2256           xfree_null (dummy);
2257           return NEWLOCATION;
2258         case RETRUNNEEDED:
2259           /* The file was already fully retrieved. */
2260           free_hstat (&hstat);
2261           xfree_null (dummy);
2262           return RETROK;
2263         case RETRFINISHED:
2264           /* Deal with you later.  */
2265           break;
2266         default:
2267           /* All possibilities should have been exhausted.  */
2268           abort ();
2269         }
2270       if (!(*dt & RETROKF))
2271         {
2272           if (!opt.verbose)
2273             {
2274               /* #### Ugly ugly ugly! */
2275               char *hurl = url_string (u, 1);
2276               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2277               xfree (hurl);
2278             }
2279           logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2280                      tms, hstat.statcode, escnonprint (hstat.error));
2281           logputs (LOG_VERBOSE, "\n");
2282           free_hstat (&hstat);
2283           xfree_null (dummy);
2284           return WRONGCODE;
2285         }
2286
2287       /* Did we get the time-stamp?  */
2288       if (!got_head)
2289         {
2290           if (opt.timestamping && !hstat.remote_time)
2291             {
2292               logputs (LOG_NOTQUIET, _("\
2293 Last-modified header missing -- time-stamps turned off.\n"));
2294             }
2295           else if (hstat.remote_time)
2296             {
2297               /* Convert the date-string into struct tm.  */
2298               tmr = http_atotm (hstat.remote_time);
2299               if (tmr == (time_t) (-1))
2300                 logputs (LOG_VERBOSE, _("\
2301 Last-modified header invalid -- time-stamp ignored.\n"));
2302             }
2303         }
2304
2305       /* The time-stamping section.  */
2306       if (use_ts)
2307         {
2308           got_head = 1;
2309           *dt &= ~HEAD_ONLY;
2310           use_ts = 0;           /* no more time-stamping */
2311           count = 0;            /* the retrieve count for HEAD is
2312                                    reset */
2313           if (hstat.remote_time && tmr != (time_t) (-1))
2314             {
2315               /* Now time-stamping can be used validly.  Time-stamping
2316                  means that if the sizes of the local and remote file
2317                  match, and local file is newer than the remote file,
2318                  it will not be retrieved.  Otherwise, the normal
2319                  download procedure is resumed.  */
2320               if (tml >= tmr &&
2321                   (hstat.contlen == -1 || local_size == hstat.contlen))
2322                 {
2323                   logprintf (LOG_VERBOSE, _("\
2324 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2325                              local_filename);
2326                   free_hstat (&hstat);
2327                   xfree_null (dummy);
2328                   return RETROK;
2329                 }
2330               else if (tml >= tmr)
2331                 logprintf (LOG_VERBOSE, _("\
2332 The sizes do not match (local %s) -- retrieving.\n"),
2333                            number_to_static_string (local_size));
2334               else
2335                 logputs (LOG_VERBOSE,
2336                          _("Remote file is newer, retrieving.\n"));
2337             }
2338           free_hstat (&hstat);
2339           continue;
2340         }
2341       if ((tmr != (time_t) (-1))
2342           && !opt.spider
2343           && ((hstat.len == hstat.contlen) ||
2344               ((hstat.res == 0) &&
2345                ((hstat.contlen == -1) ||
2346                 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2347         {
2348           /* #### This code repeats in http.c and ftp.c.  Move it to a
2349              function!  */
2350           const char *fl = NULL;
2351           if (opt.output_document)
2352             {
2353               if (output_stream_regular)
2354                 fl = opt.output_document;
2355             }
2356           else
2357             fl = *hstat.local_file;
2358           if (fl)
2359             touch (fl, tmr);
2360         }
2361       /* End of time-stamping section.  */
2362
2363       if (opt.spider)
2364         {
2365           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
2366                      escnonprint (hstat.error));
2367           xfree_null (dummy);
2368           return RETROK;
2369         }
2370
2371       tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
2372
2373       if (hstat.len == hstat.contlen)
2374         {
2375           if (*dt & RETROKF)
2376             {
2377               logprintf (LOG_VERBOSE,
2378                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2379                          tms, tmrate, locf,
2380                          number_to_static_string (hstat.len),
2381                          number_to_static_string (hstat.contlen));
2382               logprintf (LOG_NONVERBOSE,
2383                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2384                          tms, u->url,
2385                          number_to_static_string (hstat.len),
2386                          number_to_static_string (hstat.contlen),
2387                          locf, count);
2388             }
2389           ++opt.numurls;
2390           total_downloaded_bytes += hstat.len;
2391
2392           /* Remember that we downloaded the file for later ".orig" code. */
2393           if (*dt & ADDED_HTML_EXTENSION)
2394             downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2395           else
2396             downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2397
2398           free_hstat (&hstat);
2399           xfree_null (dummy);
2400           return RETROK;
2401         }
2402       else if (hstat.res == 0) /* No read error */
2403         {
2404           if (hstat.contlen == -1)  /* We don't know how much we were supposed
2405                                        to get, so assume we succeeded. */
2406             {
2407               if (*dt & RETROKF)
2408                 {
2409                   logprintf (LOG_VERBOSE,
2410                              _("%s (%s) - `%s' saved [%s]\n\n"),
2411                              tms, tmrate, locf,
2412                              number_to_static_string (hstat.len));
2413                   logprintf (LOG_NONVERBOSE,
2414                              "%s URL:%s [%s] -> \"%s\" [%d]\n",
2415                              tms, u->url, number_to_static_string (hstat.len),
2416                              locf, count);
2417                 }
2418               ++opt.numurls;
2419               total_downloaded_bytes += hstat.len;
2420
2421               /* Remember that we downloaded the file for later ".orig" code. */
2422               if (*dt & ADDED_HTML_EXTENSION)
2423                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2424               else
2425                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2426
2427               free_hstat (&hstat);
2428               xfree_null (dummy);
2429               return RETROK;
2430             }
2431           else if (hstat.len < hstat.contlen) /* meaning we lost the
2432                                                  connection too soon */
2433             {
2434               logprintf (LOG_VERBOSE,
2435                          _("%s (%s) - Connection closed at byte %s. "),
2436                          tms, tmrate, number_to_static_string (hstat.len));
2437               printwhat (count, opt.ntry);
2438               free_hstat (&hstat);
2439               continue;
2440             }
2441           else if (!opt.kill_longer) /* meaning we got more than expected */
2442             {
2443               logprintf (LOG_VERBOSE,
2444                          _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2445                          tms, tmrate, locf,
2446                          number_to_static_string (hstat.len),
2447                          number_to_static_string (hstat.contlen));
2448               logprintf (LOG_NONVERBOSE,
2449                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2450                          tms, u->url,
2451                          number_to_static_string (hstat.len),
2452                          number_to_static_string (hstat.contlen),
2453                          locf, count);
2454               ++opt.numurls;
2455               total_downloaded_bytes += hstat.len;
2456
2457               /* Remember that we downloaded the file for later ".orig" code. */
2458               if (*dt & ADDED_HTML_EXTENSION)
2459                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2460               else
2461                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2462
2463               free_hstat (&hstat);
2464               xfree_null (dummy);
2465               return RETROK;
2466             }
2467           else                  /* the same, but not accepted */
2468             {
2469               logprintf (LOG_VERBOSE,
2470                          _("%s (%s) - Connection closed at byte %s/%s. "),
2471                          tms, tmrate,
2472                          number_to_static_string (hstat.len),
2473                          number_to_static_string (hstat.contlen));
2474               printwhat (count, opt.ntry);
2475               free_hstat (&hstat);
2476               continue;
2477             }
2478         }
2479       else                      /* now hstat.res can only be -1 */
2480         {
2481           if (hstat.contlen == -1)
2482             {
2483               logprintf (LOG_VERBOSE,
2484                          _("%s (%s) - Read error at byte %s (%s)."),
2485                          tms, tmrate, number_to_static_string (hstat.len),
2486                          strerror (errno));
2487               printwhat (count, opt.ntry);
2488               free_hstat (&hstat);
2489               continue;
2490             }
2491           else                  /* hstat.res == -1 and contlen is given */
2492             {
2493               logprintf (LOG_VERBOSE,
2494                          _("%s (%s) - Read error at byte %s/%s (%s). "),
2495                          tms, tmrate,
2496                          number_to_static_string (hstat.len),
2497                          number_to_static_string (hstat.contlen),
2498                          strerror (errno));
2499               printwhat (count, opt.ntry);
2500               free_hstat (&hstat);
2501               continue;
2502             }
2503         }
2504       /* not reached */
2505     }
2506   while (!opt.ntry || (count < opt.ntry));
2507   return TRYLIMEXC;
2508 }
2509 \f
2510 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2511    than local timezone.
2512
2513    mktime is similar but assumes struct tm, also known as the
2514    "broken-down" form of time, is in local time zone.  mktime_from_utc
2515    uses mktime to make the conversion understanding that an offset
2516    will be introduced by the local time assumption.
2517
2518    mktime_from_utc then measures the introduced offset by applying
2519    gmtime to the initial result and applying mktime to the resulting
2520    "broken-down" form.  The difference between the two mktime results
2521    is the measured offset which is then subtracted from the initial
2522    mktime result to yield a calendar time which is the value returned.
2523
2524    tm_isdst in struct tm is set to 0 to force mktime to introduce a
2525    consistent offset (the non DST offset) since tm and tm+o might be
2526    on opposite sides of a DST change.
2527
2528    Some implementations of mktime return -1 for the nonexistent
2529    localtime hour at the beginning of DST.  In this event, use
2530    mktime(tm - 1hr) + 3600.
2531
2532    Schematically
2533      mktime(tm)   --> t+o
2534      gmtime(t+o)  --> tm+o
2535      mktime(tm+o) --> t+2o
2536      t+o - (t+2o - t+o) = t
2537
2538    Note that glibc contains a function of the same purpose named
2539    `timegm' (reverse of gmtime).  But obviously, it is not universally
2540    available, and unfortunately it is not straightforwardly
2541    extractable for use here.  Perhaps configure should detect timegm
2542    and use it where available.
2543
2544    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2545    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2546    Further improved by Roger with assistance from Edward J. Sabol
2547    based on input by Jamie Zawinski.  */
2548
2549 static time_t
2550 mktime_from_utc (struct tm *t)
2551 {
2552   time_t tl, tb;
2553   struct tm *tg;
2554
2555   tl = mktime (t);
2556   if (tl == -1)
2557     {
2558       t->tm_hour--;
2559       tl = mktime (t);
2560       if (tl == -1)
2561         return -1; /* can't deal with output from strptime */
2562       tl += 3600;
2563     }
2564   tg = gmtime (&tl);
2565   tg->tm_isdst = 0;
2566   tb = mktime (tg);
2567   if (tb == -1)
2568     {
2569       tg->tm_hour--;
2570       tb = mktime (tg);
2571       if (tb == -1)
2572         return -1; /* can't deal with output from gmtime */
2573       tb += 3600;
2574     }
2575   return (tl - (tb - tl));
2576 }
2577
2578 /* Check whether the result of strptime() indicates success.
2579    strptime() returns the pointer to how far it got to in the string.
2580    The processing has been successful if the string is at `GMT' or
2581    `+X', or at the end of the string.
2582
2583    In extended regexp parlance, the function returns 1 if P matches
2584    "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
2585    can return) is considered a failure and 0 is returned.  */
2586 static int
2587 check_end (const char *p)
2588 {
2589   if (!p)
2590     return 0;
2591   while (ISSPACE (*p))
2592     ++p;
2593   if (!*p
2594       || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2595       || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2596     return 1;
2597   else
2598     return 0;
2599 }
2600
2601 /* Convert the textual specification of time in TIME_STRING to the
2602    number of seconds since the Epoch.
2603
2604    TIME_STRING can be in any of the three formats RFC2616 allows the
2605    HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2606    as well as the time format used in the Set-Cookie header.
2607    Timezones are ignored, and should be GMT.
2608
2609    Return the computed time_t representation, or -1 if the conversion
2610    fails.
2611
2612    This function uses strptime with various string formats for parsing
2613    TIME_STRING.  This results in a parser that is not as lenient in
2614    interpreting TIME_STRING as I would like it to be.  Being based on
2615    strptime, it always allows shortened months, one-digit days, etc.,
2616    but due to the multitude of formats in which time can be
2617    represented, an ideal HTTP time parser would be even more
2618    forgiving.  It should completely ignore things like week days and
2619    concentrate only on the various forms of representing years,
2620    months, days, hours, minutes, and seconds.  For example, it would
2621    be nice if it accepted ISO 8601 out of the box.
2622
2623    I've investigated free and PD code for this purpose, but none was
2624    usable.  getdate was big and unwieldy, and had potential copyright
2625    issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
2626    distributed with phttpd, is excellent, but we cannot use it because
2627    it is not assigned to the FSF.  So I stuck it with strptime.  */
2628
2629 time_t
2630 http_atotm (const char *time_string)
2631 {
2632   /* NOTE: Solaris strptime man page claims that %n and %t match white
2633      space, but that's not universally available.  Instead, we simply
2634      use ` ' to mean "skip all WS", which works under all strptime
2635      implementations I've tested.  */
2636
2637   static const char *time_formats[] = {
2638     "%a, %d %b %Y %T",          /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2639     "%A, %d-%b-%y %T",          /* rfc850:  Thursday, 29-Jan-98 22:12:57 */
2640     "%a %b %d %T %Y",           /* asctime: Thu Jan 29 22:12:57 1998 */
2641     "%a, %d-%b-%Y %T"           /* cookies: Thu, 29-Jan-1998 22:12:57
2642                                    (used in Set-Cookie, defined in the
2643                                    Netscape cookie specification.) */
2644   };
2645   int i;
2646
2647   for (i = 0; i < countof (time_formats); i++)
2648     {
2649       struct tm t;
2650
2651       /* Some versions of strptime use the existing contents of struct
2652          tm to recalculate the date according to format.  Zero it out
2653          to prevent garbage from the stack influencing strptime.  */
2654       xzero (t);
2655
2656       /* Solaris strptime fails to recognize English month names in
2657          non-English locales, which we work around by not setting the
2658          LC_TIME category.  Another way would be to temporarily set
2659          locale to C before invoking strptime, but that's slow and
2660          messy.  GNU strptime does not have this problem because it
2661          recognizes English month names along with the local ones.  */
2662
2663       if (check_end (strptime (time_string, time_formats[i], &t)))
2664         return mktime_from_utc (&t);
2665     }
2666
2667   /* All formats have failed.  */
2668   return -1;
2669 }
2670 \f
2671 /* Authorization support: We support three authorization schemes:
2672
2673    * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2674
2675    * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2676    consisting of answering to the server's challenge with the proper
2677    MD5 digests.
2678
2679    * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2680    Stenberg for libcurl.  Like digest, NTLM is based on a
2681    challenge-response mechanism, but unlike digest, it is non-standard
2682    (authenticates TCP connections rather than requests), undocumented
2683    and Microsoft-specific.  */
2684
2685 /* Create the authentication header contents for the `Basic' scheme.
2686    This is done by encoding the string "USER:PASS" to base64 and
2687    prepending the string "Basic " in front of it.  */
2688
2689 static char *
2690 basic_authentication_encode (const char *user, const char *passwd)
2691 {
2692   char *t1, *t2;
2693   int len1 = strlen (user) + 1 + strlen (passwd);
2694
2695   t1 = (char *)alloca (len1 + 1);
2696   sprintf (t1, "%s:%s", user, passwd);
2697
2698   t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2699   base64_encode (t1, len1, t2);
2700
2701   return concat_strings ("Basic ", t2, (char *) 0);
2702 }
2703
2704 #define SKIP_WS(x) do {                         \
2705   while (ISSPACE (*(x)))                        \
2706     ++(x);                                      \
2707 } while (0)
2708
2709 #ifdef ENABLE_DIGEST
2710 /* Parse HTTP `WWW-Authenticate:' header.  AU points to the beginning
2711    of a field in such a header.  If the field is the one specified by
2712    ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2713    digest authorization code), extract its value in the (char*)
2714    variable pointed by RET.  Returns negative on a malformed header,
2715    or number of bytes that have been parsed by this call.  */
2716 static int
2717 extract_header_attr (const char *au, const char *attr_name, char **ret)
2718 {
2719   const char *ep;
2720   const char *cp = au;
2721
2722   if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2723     {
2724       cp += strlen (attr_name);
2725       if (!*cp)
2726         return -1;
2727       SKIP_WS (cp);
2728       if (*cp != '=')
2729         return -1;
2730       if (!*++cp)
2731         return -1;
2732       SKIP_WS (cp);
2733       if (*cp != '\"')
2734         return -1;
2735       if (!*++cp)
2736         return -1;
2737       for (ep = cp; *ep && *ep != '\"'; ep++)
2738         ;
2739       if (!*ep)
2740         return -1;
2741       xfree_null (*ret);
2742       *ret = strdupdelim (cp, ep);
2743       return ep - au + 1;
2744     }
2745   else
2746     return 0;
2747 }
2748
2749 /* Dump the hexadecimal representation of HASH to BUF.  HASH should be
2750    an array of 16 bytes containing the hash keys, and BUF should be a
2751    buffer of 33 writable characters (32 for hex digits plus one for
2752    zero termination).  */
2753 static void
2754 dump_hash (unsigned char *buf, const unsigned char *hash)
2755 {
2756   int i;
2757
2758   for (i = 0; i < MD5_HASHLEN; i++, hash++)
2759     {
2760       *buf++ = XNUM_TO_digit (*hash >> 4);
2761       *buf++ = XNUM_TO_digit (*hash & 0xf);
2762     }
2763   *buf = '\0';
2764 }
2765
2766 /* Take the line apart to find the challenge, and compose a digest
2767    authorization header.  See RFC2069 section 2.1.2.  */
2768 static char *
2769 digest_authentication_encode (const char *au, const char *user,
2770                               const char *passwd, const char *method,
2771                               const char *path)
2772 {
2773   static char *realm, *opaque, *nonce;
2774   static struct {
2775     const char *name;
2776     char **variable;
2777   } options[] = {
2778     { "realm", &realm },
2779     { "opaque", &opaque },
2780     { "nonce", &nonce }
2781   };
2782   char *res;
2783
2784   realm = opaque = nonce = NULL;
2785
2786   au += 6;                      /* skip over `Digest' */
2787   while (*au)
2788     {
2789       int i;
2790
2791       SKIP_WS (au);
2792       for (i = 0; i < countof (options); i++)
2793         {
2794           int skip = extract_header_attr (au, options[i].name,
2795                                           options[i].variable);
2796           if (skip < 0)
2797             {
2798               xfree_null (realm);
2799               xfree_null (opaque);
2800               xfree_null (nonce);
2801               return NULL;
2802             }
2803           else if (skip)
2804             {
2805               au += skip;
2806               break;
2807             }
2808         }
2809       if (i == countof (options))
2810         {
2811           while (*au && *au != '=')
2812             au++;
2813           if (*au && *++au)
2814             {
2815               SKIP_WS (au);
2816               if (*au == '\"')
2817                 {
2818                   au++;
2819                   while (*au && *au != '\"')
2820                     au++;
2821                   if (*au)
2822                     au++;
2823                 }
2824             }
2825         }
2826       while (*au && *au != ',')
2827         au++;
2828       if (*au)
2829         au++;
2830     }
2831   if (!realm || !nonce || !user || !passwd || !path || !method)
2832     {
2833       xfree_null (realm);
2834       xfree_null (opaque);
2835       xfree_null (nonce);
2836       return NULL;
2837     }
2838
2839   /* Calculate the digest value.  */
2840   {
2841     ALLOCA_MD5_CONTEXT (ctx);
2842     unsigned char hash[MD5_HASHLEN];
2843     unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2844     unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2845
2846     /* A1BUF = H(user ":" realm ":" password) */
2847     gen_md5_init (ctx);
2848     gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2849     gen_md5_update ((unsigned char *)":", 1, ctx);
2850     gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2851     gen_md5_update ((unsigned char *)":", 1, ctx);
2852     gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2853     gen_md5_finish (ctx, hash);
2854     dump_hash (a1buf, hash);
2855
2856     /* A2BUF = H(method ":" path) */
2857     gen_md5_init (ctx);
2858     gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2859     gen_md5_update ((unsigned char *)":", 1, ctx);
2860     gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2861     gen_md5_finish (ctx, hash);
2862     dump_hash (a2buf, hash);
2863
2864     /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2865     gen_md5_init (ctx);
2866     gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2867     gen_md5_update ((unsigned char *)":", 1, ctx);
2868     gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2869     gen_md5_update ((unsigned char *)":", 1, ctx);
2870     gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2871     gen_md5_finish (ctx, hash);
2872     dump_hash (response_digest, hash);
2873
2874     res = xmalloc (strlen (user)
2875                    + strlen (user)
2876                    + strlen (realm)
2877                    + strlen (nonce)
2878                    + strlen (path)
2879                    + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2880                    + (opaque ? strlen (opaque) : 0)
2881                    + 128);
2882     sprintf (res, "Digest \
2883 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2884              user, realm, nonce, path, response_digest);
2885     if (opaque)
2886       {
2887         char *p = res + strlen (res);
2888         strcat (p, ", opaque=\"");
2889         strcat (p, opaque);
2890         strcat (p, "\"");
2891       }
2892   }
2893   return res;
2894 }
2895 #endif /* ENABLE_DIGEST */
2896
2897 /* Computing the size of a string literal must take into account that
2898    value returned by sizeof includes the terminating \0.  */
2899 #define STRSIZE(literal) (sizeof (literal) - 1)
2900
2901 /* Whether chars in [b, e) begin with the literal string provided as
2902    first argument and are followed by whitespace or terminating \0.
2903    The comparison is case-insensitive.  */
2904 #define STARTS(literal, b, e)                           \
2905   ((e) - (b) >= STRSIZE (literal)                       \
2906    && 0 == strncasecmp (b, literal, STRSIZE (literal))  \
2907    && ((e) - (b) == STRSIZE (literal)                   \
2908        || ISSPACE (b[STRSIZE (literal)])))
2909
2910 static int
2911 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2912 {
2913   return STARTS ("Basic", hdrbeg, hdrend)
2914 #ifdef ENABLE_DIGEST
2915     || STARTS ("Digest", hdrbeg, hdrend)
2916 #endif
2917 #ifdef ENABLE_NTLM
2918     || STARTS ("NTLM", hdrbeg, hdrend)
2919 #endif
2920     ;
2921 }
2922
2923 #undef STARTS
2924
2925 /* Create the HTTP authorization request header.  When the
2926    `WWW-Authenticate' response header is seen, according to the
2927    authorization scheme specified in that header (`Basic' and `Digest'
2928    are supported by the current implementation), produce an
2929    appropriate HTTP authorization request header.  */
2930 static char *
2931 create_authorization_line (const char *au, const char *user,
2932                            const char *passwd, const char *method,
2933                            const char *path, int *finished)
2934 {
2935   /* We are called only with known schemes, so we can dispatch on the
2936      first letter. */
2937   switch (TOUPPER (*au))
2938     {
2939     case 'B':                   /* Basic */
2940       *finished = 1;
2941       return basic_authentication_encode (user, passwd);
2942 #ifdef ENABLE_DIGEST
2943     case 'D':                   /* Digest */
2944       *finished = 1;
2945       return digest_authentication_encode (au, user, passwd, method, path);
2946 #endif
2947 #ifdef ENABLE_NTLM
2948     case 'N':                   /* NTLM */
2949       if (!ntlm_input (&pconn.ntlm, au))
2950         {
2951           *finished = 1;
2952           return NULL;
2953         }
2954       return ntlm_output (&pconn.ntlm, user, passwd, finished);
2955 #endif
2956     default:
2957       /* We shouldn't get here -- this function should be only called
2958          with values approved by known_authentication_scheme_p.  */
2959       abort ();
2960     }
2961 }
2962 \f
2963 void
2964 save_cookies (void)
2965 {
2966   if (wget_cookie_jar)
2967     cookie_jar_save (wget_cookie_jar, opt.cookies_output);
2968 }
2969
2970 void
2971 http_cleanup (void)
2972 {
2973   xfree_null (pconn.host);
2974   if (wget_cookie_jar)
2975     cookie_jar_delete (wget_cookie_jar);
2976 }