sjero.net Git - wget/blob - src/http.c

   1 /* HTTP support.
   2    Copyright (C) 2003 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9  (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_STRING_H
  36 # include <string.h>
  37 #else
  38 # include <strings.h>
  39 #endif
  40 #ifdef HAVE_UNISTD_H
  41 # include <unistd.h>
  42 #endif
  43 #include <assert.h>
  44 #include <errno.h>
  45 #if TIME_WITH_SYS_TIME
  46 # include <sys/time.h>
  47 # include <time.h>
  48 #else
  49 # if HAVE_SYS_TIME_H
  50 #  include <sys/time.h>
  51 # else
  52 #  include <time.h>
  53 # endif
  54 #endif
  55 #ifndef errno
  56 extern int errno;
  57 #endif
  58
  59 #include "wget.h"
  60 #include "utils.h"
  61 #include "url.h"
  62 #include "host.h"
  63 #include "retr.h"
  64 #include "connect.h"
  65 #include "netrc.h"
  66 #ifdef HAVE_SSL
  67 # include "gen_sslfunc.h"
  68 #endif /* HAVE_SSL */
  69 #include "cookies.h"
  70 #ifdef USE_DIGEST
  71 # include "gen-md5.h"
  72 #endif
  73 #include "convert.h"
  74
  75 extern char *version_string;
  76 extern LARGE_INT total_downloaded_bytes;
  77
  78 extern FILE *output_stream;
  79 extern int output_stream_regular;
  80
  81 #ifndef MIN
  82 # define MIN(x, y) ((x) > (y) ? (y) : (x))
  83 #endif
  84
  85 \f
  86 static int cookies_loaded_p;
  87 struct cookie_jar *wget_cookie_jar;
  88
  89 #define TEXTHTML_S "text/html"
  90 #define TEXTXHTML_S "application/xhtml+xml"
  91
  92 /* Some status code validation macros: */
  93 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
  94 #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
  95 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY           \
  96                          || (x) == HTTP_STATUS_MOVED_TEMPORARILY        \
  97                          || (x) == HTTP_STATUS_SEE_OTHER                \
  98                          || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
  99
 100 /* HTTP/1.0 status codes from RFC1945, provided for reference.  */
 101 /* Successful 2xx.  */
 102 #define HTTP_STATUS_OK                  200
 103 #define HTTP_STATUS_CREATED             201
 104 #define HTTP_STATUS_ACCEPTED            202
 105 #define HTTP_STATUS_NO_CONTENT          204
 106 #define HTTP_STATUS_PARTIAL_CONTENTS    206
 107
 108 /* Redirection 3xx.  */
 109 #define HTTP_STATUS_MULTIPLE_CHOICES    300
 110 #define HTTP_STATUS_MOVED_PERMANENTLY   301
 111 #define HTTP_STATUS_MOVED_TEMPORARILY   302
 112 #define HTTP_STATUS_SEE_OTHER           303 /* from HTTP/1.1 */
 113 #define HTTP_STATUS_NOT_MODIFIED        304
 114 #define HTTP_STATUS_TEMPORARY_REDIRECT  307 /* from HTTP/1.1 */
 115
 116 /* Client error 4xx.  */
 117 #define HTTP_STATUS_BAD_REQUEST         400
 118 #define HTTP_STATUS_UNAUTHORIZED        401
 119 #define HTTP_STATUS_FORBIDDEN           403
 120 #define HTTP_STATUS_NOT_FOUND           404
 121 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
 122
 123 /* Server errors 5xx.  */
 124 #define HTTP_STATUS_INTERNAL            500
 125 #define HTTP_STATUS_NOT_IMPLEMENTED     501
 126 #define HTTP_STATUS_BAD_GATEWAY         502
 127 #define HTTP_STATUS_UNAVAILABLE         503
 128 \f
 129 enum rp {
 130   rel_none, rel_name, rel_value, rel_both
 131 };
 132
 133 struct request {
 134   const char *method;
 135   char *arg;
 136
 137   struct request_header {
 138     char *name, *value;
 139     enum rp release_policy;
 140   } *headers;
 141   int hcount, hcapacity;
 142 };
 143
 144 /* Create a new, empty request.  At least request_set_method must be
 145    called before the request can be used.  */
 146
 147 static struct request *
 148 request_new ()
 149 {
 150   struct request *req = xnew0 (struct request);
 151   req->hcapacity = 8;
 152   req->headers = xnew_array (struct request_header, req->hcapacity);
 153   return req;
 154 }
 155
 156 /* Set the request's method and its arguments.  METH should be a
 157    literal string (or it should outlive the request) because it will
 158    not be freed.  ARG will be freed by request_free.  */
 159
 160 static void
 161 request_set_method (struct request *req, const char *meth, char *arg)
 162 {
 163   req->method = meth;
 164   req->arg = arg;
 165 }
 166
 167 /* Return the method string passed with the last call to
 168    request_set_method.  */
 169
 170 static const char *
 171 request_method (const struct request *req)
 172 {
 173   return req->method;
 174 }
 175
 176 /* Free one header according to the release policy specified with
 177    request_set_header.  */
 178
 179 static void
 180 release_header (struct request_header *hdr)
 181 {
 182   switch (hdr->release_policy)
 183     {
 184     case rel_none:
 185       break;
 186     case rel_name:
 187       xfree (hdr->name);
 188       break;
 189     case rel_value:
 190       xfree (hdr->value);
 191       break;
 192     case rel_both:
 193       xfree (hdr->name);
 194       xfree (hdr->value);
 195       break;
 196     }
 197 }
 198
 199 /* Set the request named NAME to VALUE.  Specifically, this means that
 200    a "NAME: VALUE\r\n" header line will be used in the request.  If a
 201    header with the same name previously existed in the request, its
 202    value will be replaced by this one.
 203
 204    RELEASE_POLICY determines whether NAME and VALUE should be released
 205    (freed) with request_free.  Allowed values are:
 206
 207     - rel_none     - don't free NAME or VALUE
 208     - rel_name     - free NAME when done
 209     - rel_value    - free VALUE when done
 210     - rel_both     - free both NAME and VALUE when done
 211
 212    Setting release policy is useful when arguments come from different
 213    sources.  For example:
 214
 215      // Don't free literal strings!
 216      request_set_header (req, "Pragma", "no-cache", rel_none);
 217
 218      // Don't free a global variable, we'll need it later.
 219      request_set_header (req, "Referer", opt.referer, rel_none);
 220
 221      // Value freshly allocated, free it when done.
 222      request_set_header (req, "Range", aprintf ("bytes=%ld-", hs->restval),
 223                          rel_value);
 224    */
 225
 226 static void
 227 request_set_header (struct request *req, char *name, char *value,
 228                     enum rp release_policy)
 229 {
 230   struct request_header *hdr;
 231   int i;
 232   if (!value)
 233     return;
 234   for (i = 0; i < req->hcount; i++)
 235     {
 236       hdr = &req->headers[i];
 237       if (0 == strcasecmp (name, hdr->name))
 238         {
 239           /* Replace existing header. */
 240           release_header (hdr);
 241           hdr->name = name;
 242           hdr->value = value;
 243           hdr->release_policy = release_policy;
 244           return;
 245         }
 246     }
 247
 248   /* Install new header. */
 249
 250   if (req->hcount >= req->hcount)
 251     {
 252       req->hcapacity <<= 1;
 253       req->headers = xrealloc (req->headers,
 254                                req->hcapacity * sizeof (struct request_header));
 255     }
 256   hdr = &req->headers[req->hcount++];
 257   hdr->name = name;
 258   hdr->value = value;
 259   hdr->release_policy = release_policy;
 260 }
 261
 262 /* Like request_set_header, but sets the whole header line, as
 263    provided by the user using the `--header' option.  For example,
 264    request_set_user_header (req, "Foo: bar") works just like
 265    request_set_header (req, "Foo", "bar").  */
 266
 267 static void
 268 request_set_user_header (struct request *req, const char *header)
 269 {
 270   char *name;
 271   const char *p = strchr (header, ':');
 272   if (!p)
 273     return;
 274   BOUNDED_TO_ALLOCA (header, p, name);
 275   ++p;
 276   while (ISSPACE (*p))
 277     ++p;
 278   request_set_header (req, xstrdup (name), (char *) p, rel_name);
 279 }
 280
 281 #define APPEND(p, str) do {                     \
 282   int A_len = strlen (str);                     \
 283   memcpy (p, str, A_len);                       \
 284   p += A_len;                                   \
 285 } while (0)
 286
 287 /* Construct the request and write it to FD using fd_write.  */
 288
 289 static int
 290 request_send (const struct request *req, int fd)
 291 {
 292   char *request_string, *p;
 293   int i, size, write_error;
 294
 295   /* Count the request size. */
 296   size = 0;
 297
 298   /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
 299   size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
 300
 301   for (i = 0; i < req->hcount; i++)
 302     {
 303       struct request_header *hdr = &req->headers[i];
 304       /* NAME ": " VALUE "\r\n" */
 305       size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
 306     }
 307
 308   /* "\r\n\0" */
 309   size += 3;
 310
 311   p = request_string = alloca_array (char, size);
 312
 313   /* Generate the request. */
 314
 315   APPEND (p, req->method); *p++ = ' ';
 316   APPEND (p, req->arg);    *p++ = ' ';
 317   memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
 318
 319   for (i = 0; i < req->hcount; i++)
 320     {
 321       struct request_header *hdr = &req->headers[i];
 322       APPEND (p, hdr->name);
 323       *p++ = ':', *p++ = ' ';
 324       APPEND (p, hdr->value);
 325       *p++ = '\r', *p++ = '\n';
 326     }
 327
 328   *p++ = '\r', *p++ = '\n', *p++ = '\0';
 329   assert (p - request_string == size);
 330
 331 #undef APPEND
 332
 333   DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
 334
 335   /* Send the request to the server. */
 336
 337   write_error = fd_write (fd, request_string, size - 1, -1);
 338   if (write_error < 0)
 339     logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
 340                strerror (errno));
 341   return write_error;
 342 }
 343
 344 /* Release the resources used by REQ. */
 345
 346 static void
 347 request_free (struct request *req)
 348 {
 349   int i;
 350   xfree_null (req->arg);
 351   for (i = 0; i < req->hcount; i++)
 352     release_header (&req->headers[i]);
 353   xfree_null (req->headers);
 354   xfree (req);
 355 }
 356
 357 /* Send the contents of FILE_NAME to SOCK/SSL.  Make sure that exactly
 358    PROMISED_SIZE bytes are sent over the wire -- if the file is
 359    longer, read only that much; if the file is shorter, report an error.  */
 360
 361 static int
 362 post_file (int sock, const char *file_name, long promised_size)
 363 {
 364   static char chunk[8192];
 365   long written = 0;
 366   int write_error;
 367   FILE *fp;
 368
 369   DEBUGP (("[writing POST file %s ... ", file_name));
 370
 371   fp = fopen (file_name, "rb");
 372   if (!fp)
 373     return -1;
 374   while (!feof (fp) && written < promised_size)
 375     {
 376       int towrite;
 377       int length = fread (chunk, 1, sizeof (chunk), fp);
 378       if (length == 0)
 379         break;
 380       towrite = MIN (promised_size - written, length);
 381       write_error = fd_write (sock, chunk, towrite, -1);
 382       if (write_error < 0)
 383         {
 384           fclose (fp);
 385           return -1;
 386         }
 387       written += towrite;
 388     }
 389   fclose (fp);
 390
 391   /* If we've written less than was promised, report a (probably
 392      nonsensical) error rather than break the promise.  */
 393   if (written < promised_size)
 394     {
 395       errno = EINVAL;
 396       return -1;
 397     }
 398
 399   assert (written == promised_size);
 400   DEBUGP (("done]\n"));
 401   return 0;
 402 }
 403 \f
 404 static const char *
 405 head_terminator (const char *hunk, int oldlen, int peeklen)
 406 {
 407   const char *start, *end;
 408
 409   /* If at first peek, verify whether HUNK starts with "HTTP".  If
 410      not, this is a HTTP/0.9 request and we must bail out without
 411      reading anything.  */
 412   if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
 413     return hunk;
 414
 415   if (oldlen < 4)
 416     start = hunk;
 417   else
 418     start = hunk + oldlen - 4;
 419   end = hunk + oldlen + peeklen;
 420
 421   for (; start < end - 1; start++)
 422     if (*start == '\n')
 423       {
 424         if (start < end - 2
 425             && start[1] == '\r'
 426             && start[2] == '\n')
 427           return start + 3;
 428         if (start[1] == '\n')
 429           return start + 2;
 430       }
 431   return NULL;
 432 }
 433
 434 /* Read the HTTP request head from FD and return it.  The error
 435    conditions are the same as with fd_read_hunk.
 436
 437    To support HTTP/0.9 responses, this function tries to make sure
 438    that the data begins with "HTTP".  If this is not the case, no data
 439    is read and an empty request is returned, so that the remaining
 440    data can be treated as body.  */
 441
 442 static char *
 443 fd_read_http_head (int fd)
 444 {
 445   return fd_read_hunk (fd, head_terminator, 512);
 446 }
 447
 448 struct response {
 449   /* The response data. */
 450   const char *data;
 451
 452   /* The array of pointers that indicate where each header starts.
 453      For example, given this HTTP response:
 454
 455        HTTP/1.0 200 Ok
 456        Description: some
 457         text
 458        Etag: x
 459
 460      The headers are located like this:
 461
 462      "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
 463      ^                   ^                             ^          ^
 464      headers[0]          headers[1]                    headers[2] headers[3]
 465
 466      I.e. headers[0] points to the beginning of the request,
 467      headers[1] points to the end of the first header and the
 468      beginning of the second one, etc.  */
 469
 470   const char **headers;
 471 };
 472
 473 /* Create a new response object from the text of the HTTP response,
 474    available in HEAD.  That text is automatically split into
 475    constituent header lines for fast retrieval using
 476    response_header_*.  */
 477
 478 static struct response *
 479 response_new (const char *head)
 480 {
 481   const char *hdr;
 482   int count, size;
 483
 484   struct response *resp = xnew0 (struct response);
 485   resp->data = head;
 486
 487   if (*head == '\0')
 488     {
 489       /* Empty head means that we're dealing with a headerless
 490          (HTTP/0.9) response.  In that case, don't set HEADERS at
 491          all.  */
 492       return resp;
 493     }
 494
 495   /* Split HEAD into header lines, so that response_header_* functions
 496      don't need to do this over and over again.  */
 497
 498   size = count = 0;
 499   hdr = head;
 500   while (1)
 501     {
 502       DO_REALLOC (resp->headers, size, count + 1, const char *);
 503       resp->headers[count++] = hdr;
 504
 505       /* Break upon encountering an empty line. */
 506       if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
 507         break;
 508
 509       /* Find the end of HDR, including continuations. */
 510       do
 511         {
 512           const char *end = strchr (hdr, '\n');
 513           if (end)
 514             hdr = end + 1;
 515           else
 516             hdr += strlen (hdr);
 517         }
 518       while (*hdr == ' ' || *hdr == '\t');
 519     }
 520   DO_REALLOC (resp->headers, size, count + 1, const char *);
 521   resp->headers[count++] = NULL;
 522
 523   return resp;
 524 }
 525
 526 /* Locate the header named NAME in the request data.  If found, set
 527    *BEGPTR to its starting, and *ENDPTR to its ending position, and
 528    return 1.  Otherwise return 0.
 529
 530    This function is used as a building block for response_header_copy
 531    and response_header_strdup.  */
 532
 533 static int
 534 response_header_bounds (const struct response *resp, const char *name,
 535                         const char **begptr, const char **endptr)
 536 {
 537   int i;
 538   const char **headers = resp->headers;
 539   int name_len;
 540
 541   if (!headers || !headers[1])
 542     return 0;
 543
 544   name_len = strlen (name);
 545
 546   for (i = 1; headers[i + 1]; i++)
 547     {
 548       const char *b = headers[i];
 549       const char *e = headers[i + 1];
 550       if (e - b > name_len
 551           && b[name_len] == ':'
 552           && 0 == strncasecmp (b, name, name_len))
 553         {
 554           b += name_len + 1;
 555           while (b < e && ISSPACE (*b))
 556             ++b;
 557           while (b < e && ISSPACE (e[-1]))
 558             --e;
 559           *begptr = b;
 560           *endptr = e;
 561           return 1;
 562         }
 563     }
 564   return 0;
 565 }
 566
 567 /* Copy the response header named NAME to buffer BUF, no longer than
 568    BUFSIZE (BUFSIZE includes the terminating 0).  If the header
 569    exists, 1 is returned, otherwise 0.  If there should be no limit on
 570    the size of the header, use response_header_strdup instead.
 571
 572    If BUFSIZE is 0, no data is copied, but the boolean indication of
 573    whether the header is present is still returned.  */
 574
 575 static int
 576 response_header_copy (const struct response *resp, const char *name,
 577                       char *buf, int bufsize)
 578 {
 579   const char *b, *e;
 580   if (!response_header_bounds (resp, name, &b, &e))
 581     return 0;
 582   if (bufsize)
 583     {
 584       int len = MIN (e - b, bufsize);
 585       strncpy (buf, b, len);
 586       buf[len] = '\0';
 587     }
 588   return 1;
 589 }
 590
 591 /* Return the value of header named NAME in RESP, allocated with
 592    malloc.  If such a header does not exist in RESP, return NULL.  */
 593
 594 static char *
 595 response_header_strdup (const struct response *resp, const char *name)
 596 {
 597   const char *b, *e;
 598   if (!response_header_bounds (resp, name, &b, &e))
 599     return NULL;
 600   return strdupdelim (b, e);
 601 }
 602
 603 /* Parse the HTTP status line, which is of format:
 604
 605    HTTP-Version SP Status-Code SP Reason-Phrase
 606
 607    The function returns the status-code, or -1 if the status line
 608    appears malformed.  The pointer to "reason-phrase" message is
 609    returned in *MESSAGE.  */
 610
 611 static int
 612 response_status (const struct response *resp, char **message)
 613 {
 614   int status;
 615   const char *p, *end;
 616
 617   if (!resp->headers)
 618     {
 619       /* For a HTTP/0.9 response, assume status 200. */
 620       if (message)
 621         *message = xstrdup (_("No headers, assuming HTTP/0.9"));
 622       return 200;
 623     }
 624
 625   p = resp->headers[0];
 626   end = resp->headers[1];
 627
 628   if (!end)
 629     return -1;
 630
 631   /* "HTTP" */
 632   if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
 633     return -1;
 634   p += 4;
 635
 636   /* Match the HTTP version.  This is optional because Gnutella
 637      servers have been reported to not specify HTTP version.  */
 638   if (p < end && *p == '/')
 639     {
 640       ++p;
 641       while (p < end && ISDIGIT (*p))
 642         ++p;
 643       if (p < end && *p == '.')
 644         ++p;
 645       while (p < end && ISDIGIT (*p))
 646         ++p;
 647     }
 648
 649   while (p < end && ISSPACE (*p))
 650     ++p;
 651   if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
 652     return -1;
 653
 654   status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
 655   p += 3;
 656
 657   if (message)
 658     {
 659       while (p < end && ISSPACE (*p))
 660         ++p;
 661       while (p < end && ISSPACE (end[-1]))
 662         --end;
 663       *message = strdupdelim (p, end);
 664     }
 665
 666   return status;
 667 }
 668
 669 /* Release the resources used by RESP.  */
 670
 671 static void
 672 response_free (struct response *resp)
 673 {
 674   xfree_null (resp->headers);
 675   xfree (resp);
 676 }
 677
 678 /* Print [b, e) to the log, omitting the trailing CRLF.  */
 679
 680 static void
 681 print_server_response_1 (const char *prefix, const char *b, const char *e)
 682 {
 683   char *ln;
 684   if (b < e && e[-1] == '\n')
 685     --e;
 686   if (b < e && e[-1] == '\r')
 687     --e;
 688   BOUNDED_TO_ALLOCA (b, e, ln);
 689   logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln);
 690 }
 691
 692 /* Print the server response, line by line, omitting the trailing CR
 693    characters, prefixed with PREFIX.  */
 694
 695 static void
 696 print_server_response (const struct response *resp, const char *prefix)
 697 {
 698   int i;
 699   if (!resp->headers)
 700     return;
 701   for (i = 0; resp->headers[i + 1]; i++)
 702     print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]);
 703 }
 704
 705 /* Parse the `Content-Range' header and extract the information it
 706    contains.  Returns 1 if successful, -1 otherwise.  */
 707 static int
 708 parse_content_range (const char *hdr, long *first_byte_ptr,
 709                      long *last_byte_ptr, long *entity_length_ptr)
 710 {
 711   long num;
 712
 713   /* Ancient versions of Netscape proxy server, presumably predating
 714      rfc2068, sent out `Content-Range' without the "bytes"
 715      specifier.  */
 716   if (!strncasecmp (hdr, "bytes", 5))
 717     {
 718       hdr += 5;
 719       /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
 720          HTTP spec. */
 721       if (*hdr == ':')
 722         ++hdr;
 723       while (ISSPACE (*hdr))
 724         ++hdr;
 725       if (!*hdr)
 726         return 0;
 727     }
 728   if (!ISDIGIT (*hdr))
 729     return 0;
 730   for (num = 0; ISDIGIT (*hdr); hdr++)
 731     num = 10 * num + (*hdr - '0');
 732   if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
 733     return 0;
 734   *first_byte_ptr = num;
 735   ++hdr;
 736   for (num = 0; ISDIGIT (*hdr); hdr++)
 737     num = 10 * num + (*hdr - '0');
 738   if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
 739     return 0;
 740   *last_byte_ptr = num;
 741   ++hdr;
 742   for (num = 0; ISDIGIT (*hdr); hdr++)
 743     num = 10 * num + (*hdr - '0');
 744   *entity_length_ptr = num;
 745   return 1;
 746 }
 747
 748 /* Read the body of the request, but don't store it anywhere and don't
 749    display a progress gauge.  This is useful for reading the error
 750    responses whose bodies don't need to be displayed or logged, but
 751    which need to be read anyway.  */
 752
 753 static void
 754 skip_short_body (int fd, long contlen)
 755 {
 756   /* Skipping the body doesn't make sense if the content length is
 757      unknown because, in that case, persistent connections cannot be
 758      used.  (#### This is not the case with HTTP/1.1 where they can
 759      still be used with the magic of the "chunked" transfer!)  */
 760   if (contlen == -1)
 761     return;
 762   DEBUGP (("Skipping %ld bytes of body data... ", contlen));
 763
 764   while (contlen > 0)
 765     {
 766       char dlbuf[512];
 767       int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1);
 768       if (ret <= 0)
 769         return;
 770       contlen -= ret;
 771     }
 772   DEBUGP (("done.\n"));
 773 }
 774 \f
 775 /* Persistent connections.  Currently, we cache the most recently used
 776    connection as persistent, provided that the HTTP server agrees to
 777    make it such.  The persistence data is stored in the variables
 778    below.  Ideally, it should be possible to cache an arbitrary fixed
 779    number of these connections.  */
 780
 781 /* Whether a persistent connection is active. */
 782 static int pconn_active;
 783
 784 static struct {
 785   /* The socket of the connection.  */
 786   int socket;
 787
 788   /* Host and port of the currently active persistent connection. */
 789   char *host;
 790   int port;
 791
 792   /* Whether a ssl handshake has occoured on this connection.  */
 793   int ssl;
 794 } pconn;
 795
 796 /* Mark the persistent connection as invalid and free the resources it
 797    uses.  This is used by the CLOSE_* macros after they forcefully
 798    close a registered persistent connection.  */
 799
 800 static void
 801 invalidate_persistent (void)
 802 {
 803   DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
 804   pconn_active = 0;
 805   fd_close (pconn.socket);
 806   xfree (pconn.host);
 807   xzero (pconn);
 808 }
 809
 810 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
 811    persistent.  This will enable someone to use the same connection
 812    later.  In the context of HTTP, this must be called only AFTER the
 813    response has been received and the server has promised that the
 814    connection will remain alive.
 815
 816    If a previous connection was persistent, it is closed. */
 817
 818 static void
 819 register_persistent (const char *host, int port, int fd, int ssl)
 820 {
 821   if (pconn_active)
 822     {
 823       if (pconn.socket == fd)
 824         {
 825           /* The connection FD is already registered. */
 826           return;
 827         }
 828       else
 829         {
 830           /* The old persistent connection is still active; close it
 831              first.  This situation arises whenever a persistent
 832              connection exists, but we then connect to a different
 833              host, and try to register a persistent connection to that
 834              one.  */
 835           invalidate_persistent ();
 836         }
 837     }
 838
 839   pconn_active = 1;
 840   pconn.socket = fd;
 841   pconn.host = xstrdup (host);
 842   pconn.port = port;
 843   pconn.ssl = ssl;
 844
 845   DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
 846 }
 847
 848 /* Return non-zero if a persistent connection is available for
 849    connecting to HOST:PORT.  */
 850
 851 static int
 852 persistent_available_p (const char *host, int port, int ssl,
 853                         int *host_lookup_failed)
 854 {
 855   /* First, check whether a persistent connection is active at all.  */
 856   if (!pconn_active)
 857     return 0;
 858
 859   /* If we want SSL and the last connection wasn't or vice versa,
 860      don't use it.  Checking for host and port is not enough because
 861      HTTP and HTTPS can apparently coexist on the same port.  */
 862   if (ssl != pconn.ssl)
 863     return 0;
 864
 865   /* If we're not connecting to the same port, we're not interested. */
 866   if (port != pconn.port)
 867     return 0;
 868
 869   /* If the host is the same, we're in business.  If not, there is
 870      still hope -- read below.  */
 871   if (0 != strcasecmp (host, pconn.host))
 872     {
 873       /* If pconn.socket is already talking to HOST, we needn't
 874          reconnect.  This happens often when both sites are virtual
 875          hosts distinguished only by name and served by the same
 876          network interface, and hence the same web server (possibly
 877          set up by the ISP and serving many different web sites).
 878          This admittedly non-standard optimization does not contradict
 879          HTTP and works well with popular server software.  */
 880
 881       int found;
 882       ip_address ip;
 883       struct address_list *al;
 884
 885       if (ssl)
 886         /* Don't try to talk to two different SSL sites over the same
 887            secure connection!  (Besides, it's not clear if name-based
 888            virtual hosting is even possible with SSL.)  */
 889         return 0;
 890
 891       /* If pconn.socket's peer is one of the IP addresses HOST
 892          resolves to, pconn.socket is for all intents and purposes
 893          already talking to HOST.  */
 894
 895       if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
 896         {
 897           /* Can't get the peer's address -- something must be very
 898              wrong with the connection.  */
 899           invalidate_persistent ();
 900           return 0;
 901         }
 902       al = lookup_host (host, 0);
 903       if (!al)
 904         {
 905           *host_lookup_failed = 1;
 906           return 0;
 907         }
 908
 909       found = address_list_contains (al, &ip);
 910       address_list_release (al);
 911
 912       if (!found)
 913         return 0;
 914
 915       /* The persistent connection's peer address was found among the
 916          addresses HOST resolved to; therefore, pconn.sock is in fact
 917          already talking to HOST -- no need to reconnect.  */
 918     }
 919
 920   /* Finally, check whether the connection is still open.  This is
 921      important because most server implement a liberal (short) timeout
 922      on persistent connections.  Wget can of course always reconnect
 923      if the connection doesn't work out, but it's nicer to know in
 924      advance.  This test is a logical followup of the first test, but
 925      is "expensive" and therefore placed at the end of the list.  */
 926
 927   if (!test_socket_open (pconn.socket))
 928     {
 929       /* Oops, the socket is no longer open.  Now that we know that,
 930          let's invalidate the persistent connection before returning
 931          0.  */
 932       invalidate_persistent ();
 933       return 0;
 934     }
 935
 936   return 1;
 937 }
 938
 939 /* The idea behind these two CLOSE macros is to distinguish between
 940    two cases: one when the job we've been doing is finished, and we
 941    want to close the connection and leave, and two when something is
 942    seriously wrong and we're closing the connection as part of
 943    cleanup.
 944
 945    In case of keep_alive, CLOSE_FINISH should leave the connection
 946    open, while CLOSE_INVALIDATE should still close it.
 947
 948    Note that the semantics of the flag `keep_alive' is "this
 949    connection *will* be reused (the server has promised not to close
 950    the connection once we're done)", while the semantics of
 951    `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
 952    active, registered connection".  */
 953
 954 #define CLOSE_FINISH(fd) do {                   \
 955   if (!keep_alive)                              \
 956     {                                           \
 957       if (pconn_active && (fd) == pconn.socket) \
 958         invalidate_persistent ();               \
 959       else                                      \
 960         {                                       \
 961           fd_close (fd);                        \
 962           fd = -1;                              \
 963         }                                       \
 964     }                                           \
 965 } while (0)
 966
 967 #define CLOSE_INVALIDATE(fd) do {               \
 968   if (pconn_active && (fd) == pconn.socket)     \
 969     invalidate_persistent ();                   \
 970   else                                          \
 971     fd_close (fd);                              \
 972   fd = -1;                                      \
 973 } while (0)
 974 \f
 975 struct http_stat
 976 {
 977   long len;                     /* received length */
 978   long contlen;                 /* expected length */
 979   long restval;                 /* the restart value */
 980   int res;                      /* the result of last read */
 981   char *newloc;                 /* new location (redirection) */
 982   char *remote_time;            /* remote time-stamp string */
 983   char *error;                  /* textual HTTP error */
 984   int statcode;                 /* status code */
 985   long rd_size;                 /* amount of data read from socket */
 986   double dltime;                /* time it took to download the data */
 987   const char *referer;          /* value of the referer header. */
 988   char **local_file;            /* local file. */
 989 };
 990
 991 static void
 992 free_hstat (struct http_stat *hs)
 993 {
 994   xfree_null (hs->newloc);
 995   xfree_null (hs->remote_time);
 996   xfree_null (hs->error);
 997
 998   /* Guard against being called twice. */
 999   hs->newloc = NULL;
1000   hs->remote_time = NULL;
1001   hs->error = NULL;
1002 }
1003
1004 static char *create_authorization_line PARAMS ((const char *, const char *,
1005                                                 const char *, const char *,
1006                                                 const char *));
1007 static char *basic_authentication_encode PARAMS ((const char *, const char *));
1008 static int known_authentication_scheme_p PARAMS ((const char *));
1009
1010 time_t http_atotm PARAMS ((const char *));
1011
1012 #define BEGINS_WITH(line, string_constant)                              \
1013   (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)   \
1014    && (ISSPACE (line[sizeof (string_constant) - 1])                     \
1015        || !line[sizeof (string_constant) - 1]))
1016
1017 /* Retrieve a document through HTTP protocol.  It recognizes status
1018    code, and correctly handles redirections.  It closes the network
1019    socket.  If it receives an error from the functions below it, it
1020    will print it if there is enough information to do so (almost
1021    always), returning the error to the caller (i.e. http_loop).
1022
1023    Various HTTP parameters are stored to hs.
1024
1025    If PROXY is non-NULL, the connection will be made to the proxy
1026    server, and u->url will be requested.  */
1027 static uerr_t
1028 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1029 {
1030   struct request *req;
1031
1032   char *type;
1033   char *user, *passwd;
1034   char *proxyauth;
1035   int statcode;
1036   int write_error;
1037   long contlen, contrange;
1038   struct url *conn;
1039   FILE *fp;
1040
1041   int sock = -1;
1042   int flags;
1043
1044   /* Whether authorization has been already tried. */
1045   int auth_tried_already = 0;
1046
1047   /* Whether our connection to the remote host is through SSL.  */
1048   int using_ssl = 0;
1049
1050   char *head;
1051   struct response *resp;
1052   char hdrval[256];
1053   char *message;
1054
1055   /* Whether this connection will be kept alive after the HTTP request
1056      is done. */
1057   int keep_alive;
1058
1059   /* Whether keep-alive should be inhibited. */
1060   int inhibit_keep_alive = !opt.http_keep_alive;
1061
1062   /* Headers sent when using POST. */
1063   long post_data_size = 0;
1064
1065   int host_lookup_failed = 0;
1066
1067 #ifdef HAVE_SSL
1068   if (u->scheme == SCHEME_HTTPS)
1069     {
1070       /* Initialize the SSL context.  After this has once been done,
1071          it becomes a no-op.  */
1072       switch (ssl_init ())
1073         {
1074         case SSLERRCTXCREATE:
1075           /* this is fatal */
1076           logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
1077           return SSLERRCTXCREATE;
1078         case SSLERRCERTFILE:
1079           /* try without certfile */
1080           logprintf (LOG_NOTQUIET,
1081                      _("Failed to load certificates from %s\n"),
1082                      opt.sslcertfile);
1083           logprintf (LOG_NOTQUIET,
1084                      _("Trying without the specified certificate\n"));
1085           break;
1086         case SSLERRCERTKEY:
1087           logprintf (LOG_NOTQUIET,
1088                      _("Failed to get certificate key from %s\n"),
1089                      opt.sslcertkey);
1090           logprintf (LOG_NOTQUIET,
1091                      _("Trying without the specified certificate\n"));
1092           break;
1093         default:
1094           break;
1095         }
1096     }
1097 #endif /* HAVE_SSL */
1098
1099   if (!(*dt & HEAD_ONLY))
1100     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1101        know the local filename so we can save to it. */
1102     assert (*hs->local_file != NULL);
1103
1104   auth_tried_already = 0;
1105
1106   /* Initialize certain elements of struct http_stat.  */
1107   hs->len = 0L;
1108   hs->contlen = -1;
1109   hs->res = -1;
1110   hs->newloc = NULL;
1111   hs->remote_time = NULL;
1112   hs->error = NULL;
1113
1114   conn = u;
1115
1116   proxyauth = NULL;
1117   if (proxy)
1118     {
1119       char *proxy_user, *proxy_passwd;
1120       /* For normal username and password, URL components override
1121          command-line/wgetrc parameters.  With proxy
1122          authentication, it's the reverse, because proxy URLs are
1123          normally the "permanent" ones, so command-line args
1124          should take precedence.  */
1125       if (opt.proxy_user && opt.proxy_passwd)
1126         {
1127           proxy_user = opt.proxy_user;
1128           proxy_passwd = opt.proxy_passwd;
1129         }
1130       else
1131         {
1132           proxy_user = proxy->user;
1133           proxy_passwd = proxy->passwd;
1134         }
1135       /* #### This does not appear right.  Can't the proxy request,
1136          say, `Digest' authentication?  */
1137       if (proxy_user && proxy_passwd)
1138         proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1139
1140       /* If we're using a proxy, we will be connecting to the proxy
1141          server.  */
1142       conn = proxy;
1143     }
1144
1145   /* Prepare the request to send. */
1146
1147   req = request_new ();
1148   {
1149     const char *meth = "GET";
1150     if (*dt & HEAD_ONLY)
1151       meth = "HEAD";
1152     else if (opt.post_file_name || opt.post_data)
1153       meth = "POST";
1154     /* Use the full path, i.e. one that includes the leading slash and
1155        the query string.  E.g. if u->path is "foo/bar" and u->query is
1156        "param=value", full_path will be "/foo/bar?param=value".  */
1157     request_set_method (req, meth,
1158                         proxy ? xstrdup (u->url) : url_full_path (u));
1159   }
1160
1161   request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1162   if (*dt & SEND_NOCACHE)
1163     request_set_header (req, "Pragma", "no-cache", rel_none);
1164   if (hs->restval)
1165     request_set_header (req, "Range",
1166                         aprintf ("bytes=%ld-", hs->restval), rel_value);
1167   if (opt.useragent)
1168     request_set_header (req, "User-Agent", opt.useragent, rel_none);
1169   else
1170     request_set_header (req, "User-Agent",
1171                         aprintf ("Wget/%s", version_string), rel_value);
1172   request_set_header (req, "Accept", "*/*", rel_none);
1173
1174   /* Find the username and password for authentication. */
1175   user = u->user;
1176   passwd = u->passwd;
1177   search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1178   user = user ? user : opt.http_user;
1179   passwd = passwd ? passwd : opt.http_passwd;
1180
1181   if (user && passwd)
1182     {
1183       /* We have the username and the password, but haven't tried
1184          any authorization yet.  Let's see if the "Basic" method
1185          works.  If not, we'll come back here and construct a
1186          proper authorization method with the right challenges.
1187
1188          If we didn't employ this kind of logic, every URL that
1189          requires authorization would have to be processed twice,
1190          which is very suboptimal and generates a bunch of false
1191          "unauthorized" errors in the server log.
1192
1193          #### But this logic also has a serious problem when used
1194          with stronger authentications: we *first* transmit the
1195          username and the password in clear text, and *then* attempt a
1196          stronger authentication scheme.  That cannot be right!  We
1197          are only fortunate that almost everyone still uses the
1198          `Basic' scheme anyway.
1199
1200          There should be an option to prevent this from happening, for
1201          those who use strong authentication schemes and value their
1202          passwords.  */
1203       request_set_header (req, "Authorization",
1204                           basic_authentication_encode (user, passwd),
1205                           rel_value);
1206     }
1207
1208   {
1209     /* Whether we need to print the host header with braces around
1210        host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1211        usual "Host: symbolic-name:1234". */
1212     int squares = strchr (u->host, ':') != NULL;
1213     if (u->port == scheme_default_port (u->scheme))
1214       request_set_header (req, "Host",
1215                           aprintf (squares ? "[%s]" : "%s", u->host),
1216                           rel_value);
1217     else
1218       request_set_header (req, "Host",
1219                           aprintf (squares ? "[%s]:%d" : "%s:%d",
1220                                    u->host, u->port),
1221                           rel_value);
1222   }
1223
1224   if (!inhibit_keep_alive)
1225     request_set_header (req, "Connection", "Keep-Alive", rel_none);
1226
1227   if (opt.cookies)
1228     request_set_header (req, "Cookie",
1229                         cookie_header (wget_cookie_jar,
1230                                        u->host, u->port, u->path,
1231 #ifdef HAVE_SSL
1232                                        u->scheme == SCHEME_HTTPS
1233 #else
1234                                        0
1235 #endif
1236                                        ),
1237                         rel_value);
1238
1239   if (opt.post_data || opt.post_file_name)
1240     {
1241       request_set_header (req, "Content-Type",
1242                           "application/x-www-form-urlencoded", rel_none);
1243       if (opt.post_data)
1244         post_data_size = strlen (opt.post_data);
1245       else
1246         {
1247           post_data_size = file_size (opt.post_file_name);
1248           if (post_data_size == -1)
1249             {
1250               logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
1251                          opt.post_file_name);
1252               post_data_size = 0;
1253             }
1254         }
1255       request_set_header (req, "Content-Length",
1256                           aprintf ("Content-Length: %ld", post_data_size),
1257                           rel_value);
1258     }
1259
1260   /* Add the user headers. */
1261   if (opt.user_headers)
1262     {
1263       int i;
1264       for (i = 0; opt.user_headers[i]; i++)
1265         request_set_user_header (req, opt.user_headers[i]);
1266     }
1267
1268  retry_with_auth:
1269   /* We need to come back here when the initial attempt to retrieve
1270      without authorization header fails.  (Expected to happen at least
1271      for the Digest authorization scheme.)  */
1272
1273   keep_alive = 0;
1274
1275   /* Establish the connection.  */
1276
1277   if (!inhibit_keep_alive)
1278     {
1279       /* Look for a persistent connection to target host, unless a
1280          proxy is used.  The exception is when SSL is in use, in which
1281          case the proxy is nothing but a passthrough to the target
1282          host, registered as a connection to the latter.  */
1283       struct url *relevant = conn;
1284 #ifdef HAVE_SSL
1285       if (u->scheme == SCHEME_HTTPS)
1286         relevant = u;
1287 #endif
1288
1289       if (persistent_available_p (relevant->host, relevant->port,
1290 #ifdef HAVE_SSL
1291                                   relevant->scheme == SCHEME_HTTPS,
1292 #else
1293                                   0,
1294 #endif
1295                                   &host_lookup_failed))
1296         {
1297           sock = pconn.socket;
1298           using_ssl = pconn.ssl;
1299           logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1300                      pconn.host, pconn.port);
1301           DEBUGP (("Reusing fd %d.\n", sock));
1302         }
1303     }
1304
1305   if (sock < 0)
1306     {
1307       /* In its current implementation, persistent_available_p will
1308          look up conn->host in some cases.  If that lookup failed, we
1309          don't need to bother with connect_to_host.  */
1310       if (host_lookup_failed)
1311         return HOSTERR;
1312
1313       sock = connect_to_host (conn->host, conn->port);
1314       if (sock == E_HOST)
1315         return HOSTERR;
1316       else if (sock < 0)
1317         return (retryable_socket_connect_error (errno)
1318                 ? CONERROR : CONIMPOSSIBLE);
1319
1320 #ifdef HAVE_SSL
1321       if (proxy && u->scheme == SCHEME_HTTPS)
1322         {
1323           /* When requesting SSL URLs through proxies, use the
1324              CONNECT method to request passthrough.  */
1325           struct request *connreq = request_new ();
1326           request_set_method (connreq, "CONNECT",
1327                               aprintf ("%s:%d", u->host, u->port));
1328           if (proxyauth)
1329             {
1330               request_set_header (connreq, "Proxy-Authorization",
1331                                   proxyauth, rel_value);
1332               /* Now that PROXYAUTH is part of the CONNECT request,
1333                  zero it out so we don't send proxy authorization with
1334                  the regular request below.  */
1335               proxyauth = NULL;
1336             }
1337
1338           write_error = request_send (connreq, sock);
1339           request_free (connreq);
1340           if (write_error < 0)
1341             {
1342               logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1343                          strerror (errno));
1344               CLOSE_INVALIDATE (sock);
1345               return WRITEFAILED;
1346             }
1347
1348           head = fd_read_http_head (sock);
1349           if (!head)
1350             {
1351               logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1352                          strerror (errno));
1353               CLOSE_INVALIDATE (sock);
1354               return HERR;
1355             }
1356           message = NULL;
1357           if (!*head)
1358             {
1359               xfree (head);
1360               goto failed_tunnel;
1361             }
1362           DEBUGP (("proxy responded with: [%s]\n", head));
1363
1364           resp = response_new (head);
1365           statcode = response_status (resp, &message);
1366           response_free (resp);
1367           if (statcode != 200)
1368             {
1369             failed_tunnel:
1370               logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1371                          message ? message : "?");
1372               xfree_null (message);
1373               return CONSSLERR;
1374             }
1375           xfree (message);
1376
1377           /* SOCK is now *really* connected to u->host, so update CONN
1378              to reflect this.  That way register_persistent will
1379              register SOCK as being connected to u->host:u->port.  */
1380           conn = u;
1381         }
1382
1383       if (conn->scheme == SCHEME_HTTPS)
1384         {
1385           if (!ssl_connect (sock))
1386             {
1387               fd_close (sock);
1388               return CONSSLERR;
1389             }
1390           using_ssl = 1;
1391         }
1392 #endif /* HAVE_SSL */
1393     }
1394
1395   /* Send the request to server.  */
1396   write_error = request_send (req, sock);
1397
1398   if (write_error >= 0)
1399     {
1400       if (opt.post_data)
1401         {
1402           DEBUGP (("[POST data: %s]\n", opt.post_data));
1403           write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1404         }
1405       else if (opt.post_file_name && post_data_size != 0)
1406         write_error = post_file (sock, opt.post_file_name, post_data_size);
1407     }
1408
1409   if (write_error < 0)
1410     {
1411       logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1412                  strerror (errno));
1413       CLOSE_INVALIDATE (sock);
1414       request_free (req);
1415       return WRITEFAILED;
1416     }
1417   logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1418              proxy ? "Proxy" : "HTTP");
1419   contlen = -1;
1420   contrange = 0;
1421   type = NULL;
1422   statcode = -1;
1423   *dt &= ~RETROKF;
1424
1425   head = fd_read_http_head (sock);
1426   if (!head)
1427     {
1428       if (errno == 0)
1429         {
1430           logputs (LOG_NOTQUIET, _("No data received.\n"));
1431           CLOSE_INVALIDATE (sock);
1432           request_free (req);
1433           return HEOF;
1434         }
1435       else
1436         {
1437           logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1438                      strerror (errno));
1439           CLOSE_INVALIDATE (sock);
1440           request_free (req);
1441           return HERR;
1442         }
1443     }
1444   DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1445
1446   resp = response_new (head);
1447
1448   /* Check for status line.  */
1449   message = NULL;
1450   statcode = response_status (resp, &message);
1451   if (!opt.server_response)
1452     logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
1453   else
1454     {
1455       logprintf (LOG_VERBOSE, "\n");
1456       print_server_response (resp, "  ");
1457     }
1458
1459   if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1460     contlen = strtol (hdrval, NULL, 10);
1461
1462   /* Check for keep-alive related responses. */
1463   if (!inhibit_keep_alive && contlen != -1)
1464     {
1465       if (response_header_copy (resp, "Keep-Alive", NULL, 0))
1466         keep_alive = 1;
1467       else if (response_header_copy (resp, "Connection", hdrval,
1468                                      sizeof (hdrval)))
1469         {
1470           if (0 == strcasecmp (hdrval, "Keep-Alive"))
1471             keep_alive = 1;
1472         }
1473     }
1474   if (keep_alive)
1475     /* The server has promised that it will not close the connection
1476        when we're done.  This means that we can register it.  */
1477     register_persistent (conn->host, conn->port, sock, using_ssl);
1478
1479   if (statcode == HTTP_STATUS_UNAUTHORIZED)
1480     {
1481       /* Authorization is required.  */
1482       skip_short_body (sock, contlen);
1483       CLOSE_FINISH (sock);
1484       if (auth_tried_already || !(user && passwd))
1485         {
1486           /* If we have tried it already, then there is not point
1487              retrying it.  */
1488           logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1489         }
1490       else
1491         {
1492           char *www_authenticate = response_header_strdup (resp,
1493                                                            "WWW-Authenticate");
1494           /* If the authentication scheme is unknown or if it's the
1495              "Basic" authentication (which we try by default), there's
1496              no sense in retrying.  */
1497           if (!www_authenticate
1498               || !known_authentication_scheme_p (www_authenticate)
1499               || BEGINS_WITH (www_authenticate, "Basic"))
1500             {
1501               xfree_null (www_authenticate);
1502               logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1503             }
1504           else
1505             {
1506               char *pth;
1507               auth_tried_already = 1;
1508               pth = url_full_path (u);
1509               request_set_header (req, "Authorization",
1510                                   create_authorization_line (www_authenticate,
1511                                                              user, passwd,
1512                                                              request_method (req),
1513                                                              pth),
1514                                   rel_value);
1515               xfree (pth);
1516               xfree (www_authenticate);
1517               goto retry_with_auth;
1518             }
1519         }
1520       request_free (req);
1521       return AUTHFAILED;
1522     }
1523   request_free (req);
1524
1525   hs->statcode = statcode;
1526   if (statcode == -1)
1527     hs->error = xstrdup (_("Malformed status line"));
1528   else if (!*message)
1529     hs->error = xstrdup (_("(no description)"));
1530   else
1531     hs->error = xstrdup (message);
1532
1533   type = response_header_strdup (resp, "Content-Type");
1534   if (type)
1535     {
1536       char *tmp = strchr (type, ';');
1537       if (tmp)
1538         {
1539           while (tmp > type && ISSPACE (tmp[-1]))
1540             --tmp;
1541           *tmp = '\0';
1542         }
1543     }
1544   hs->newloc = response_header_strdup (resp, "Location");
1545   hs->remote_time = response_header_strdup (resp, "Last-Modified");
1546   {
1547     char *set_cookie = response_header_strdup (resp, "Set-Cookie");
1548     if (set_cookie)
1549       {
1550         /* The jar should have been created by now. */
1551         assert (wget_cookie_jar != NULL);
1552         cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
1553                                   set_cookie);
1554         xfree (set_cookie);
1555       }
1556   }
1557   if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1558     {
1559       long first_byte_pos, last_byte_pos, entity_length;
1560       if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1561                                &entity_length))
1562         contrange = first_byte_pos;
1563     }
1564   response_free (resp);
1565
1566   /* 20x responses are counted among successful by default.  */
1567   if (H_20X (statcode))
1568     *dt |= RETROKF;
1569
1570   /* Return if redirected.  */
1571   if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1572     {
1573       /* RFC2068 says that in case of the 300 (multiple choices)
1574          response, the server can output a preferred URL through
1575          `Location' header; otherwise, the request should be treated
1576          like GET.  So, if the location is set, it will be a
1577          redirection; otherwise, just proceed normally.  */
1578       if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1579         *dt |= RETROKF;
1580       else
1581         {
1582           logprintf (LOG_VERBOSE,
1583                      _("Location: %s%s\n"),
1584                      hs->newloc ? hs->newloc : _("unspecified"),
1585                      hs->newloc ? _(" [following]") : "");
1586           if (keep_alive)
1587             skip_short_body (sock, contlen);
1588           CLOSE_FINISH (sock);
1589           xfree_null (type);
1590           return NEWLOCATION;
1591         }
1592     }
1593
1594   /* If content-type is not given, assume text/html.  This is because
1595      of the multitude of broken CGI's that "forget" to generate the
1596      content-type.  */
1597   if (!type ||
1598         0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1599         0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1600     *dt |= TEXTHTML;
1601   else
1602     *dt &= ~TEXTHTML;
1603
1604   if (opt.html_extension && (*dt & TEXTHTML))
1605     /* -E / --html-extension / html_extension = on was specified, and this is a
1606        text/html file.  If some case-insensitive variation on ".htm[l]" isn't
1607        already the file's suffix, tack on ".html". */
1608     {
1609       char*  last_period_in_local_filename = strrchr(*hs->local_file, '.');
1610
1611       if (last_period_in_local_filename == NULL
1612           || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1613                || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1614         {
1615           size_t  local_filename_len = strlen(*hs->local_file);
1616
1617           *hs->local_file = xrealloc(*hs->local_file,
1618                                      local_filename_len + sizeof(".html"));
1619           strcpy(*hs->local_file + local_filename_len, ".html");
1620
1621           *dt |= ADDED_HTML_EXTENSION;
1622         }
1623     }
1624
1625   if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
1626     {
1627       /* If `-c' is in use and the file has been fully downloaded (or
1628          the remote file has shrunk), Wget effectively requests bytes
1629          after the end of file and the server response with 416.  */
1630       logputs (LOG_VERBOSE, _("\
1631 \n    The file is already fully retrieved; nothing to do.\n\n"));
1632       /* In case the caller inspects. */
1633       hs->len = contlen;
1634       hs->res = 0;
1635       /* Mark as successfully retrieved. */
1636       *dt |= RETROKF;
1637       xfree_null (type);
1638       CLOSE_INVALIDATE (sock);  /* would be CLOSE_FINISH, but there
1639                                    might be more bytes in the body. */
1640       return RETRUNNEEDED;
1641     }
1642   if ((contrange != 0 && contrange != hs->restval)
1643       || (H_PARTIAL (statcode) && !contrange))
1644     {
1645       /* The Range request was somehow misunderstood by the server.
1646          Bail out.  */
1647       xfree_null (type);
1648       CLOSE_INVALIDATE (sock);
1649       return RANGEERR;
1650     }
1651   hs->contlen = contlen + contrange;
1652
1653   if (opt.verbose)
1654     {
1655       if (*dt & RETROKF)
1656         {
1657           /* No need to print this output if the body won't be
1658              downloaded at all, or if the original server response is
1659              printed.  */
1660           logputs (LOG_VERBOSE, _("Length: "));
1661           if (contlen != -1)
1662             {
1663               logputs (LOG_VERBOSE, legible (contlen + contrange));
1664               if (contrange)
1665                 logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen));
1666             }
1667           else
1668             logputs (LOG_VERBOSE,
1669                      opt.ignore_length ? _("ignored") : _("unspecified"));
1670           if (type)
1671             logprintf (LOG_VERBOSE, " [%s]\n", type);
1672           else
1673             logputs (LOG_VERBOSE, "\n");
1674         }
1675     }
1676   xfree_null (type);
1677   type = NULL;                  /* We don't need it any more.  */
1678
1679   /* Return if we have no intention of further downloading.  */
1680   if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1681     {
1682       /* In case the caller cares to look...  */
1683       hs->len = 0L;
1684       hs->res = 0;
1685       xfree_null (type);
1686       /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
1687          servers not to send body in response to a HEAD request.  If
1688          you encounter such a server (more likely a broken CGI), use
1689          `--no-http-keep-alive'.  */
1690       CLOSE_FINISH (sock);
1691       return RETRFINISHED;
1692     }
1693
1694   /* Open the local file.  */
1695   if (!output_stream)
1696     {
1697       mkalldirs (*hs->local_file);
1698       if (opt.backups)
1699         rotate_backups (*hs->local_file);
1700       fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
1701       if (!fp)
1702         {
1703           logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1704           CLOSE_INVALIDATE (sock);
1705           return FOPENERR;
1706         }
1707     }
1708   else
1709     fp = output_stream;
1710
1711   /* #### This confuses the timestamping code that checks for file
1712      size.  Maybe we should save some additional information?  */
1713   if (opt.save_headers)
1714     fwrite (head, 1, strlen (head), fp);
1715
1716   /* Download the request body.  */
1717   flags = 0;
1718   if (keep_alive)
1719     flags |= rb_read_exactly;
1720   if (hs->restval > 0 && contrange == 0)
1721     /* If the server ignored our range request, instruct fd_read_body
1722        to skip the first RESTVAL bytes of body.  */
1723     flags |= rb_skip_startpos;
1724   hs->len = hs->restval;
1725   hs->rd_size = 0;
1726   hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
1727                           hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
1728                           flags);
1729
1730   if (hs->res >= 0)
1731     CLOSE_FINISH (sock);
1732   else
1733     CLOSE_INVALIDATE (sock);
1734
1735   {
1736     /* Close or flush the file.  We have to be careful to check for
1737        error here.  Checking the result of fwrite() is not enough --
1738        errors could go unnoticed!  */
1739     int flush_res;
1740     if (!output_stream)
1741       flush_res = fclose (fp);
1742     else
1743       flush_res = fflush (fp);
1744     if (flush_res == EOF)
1745       hs->res = -2;
1746   }
1747   if (hs->res == -2)
1748     return FWRITEERR;
1749   return RETRFINISHED;
1750 }
1751
1752 /* The genuine HTTP loop!  This is the part where the retrieval is
1753    retried, and retried, and retried, and...  */
1754 uerr_t
1755 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1756            int *dt, struct url *proxy)
1757 {
1758   int count;
1759   int use_ts, got_head = 0;     /* time-stamping info */
1760   char *filename_plus_orig_suffix;
1761   char *local_filename = NULL;
1762   char *tms, *locf, *tmrate;
1763   uerr_t err;
1764   time_t tml = -1, tmr = -1;    /* local and remote time-stamps */
1765   long local_size = 0;          /* the size of the local file */
1766   size_t filename_len;
1767   struct http_stat hstat;       /* HTTP status */
1768   struct stat st;
1769   char *dummy = NULL;
1770
1771   /* This used to be done in main(), but it's a better idea to do it
1772      here so that we don't go through the hoops if we're just using
1773      FTP or whatever. */
1774   if (opt.cookies)
1775     {
1776       if (!wget_cookie_jar)
1777         wget_cookie_jar = cookie_jar_new ();
1778       if (opt.cookies_input && !cookies_loaded_p)
1779         {
1780           cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1781           cookies_loaded_p = 1;
1782         }
1783     }
1784
1785   *newloc = NULL;
1786
1787   /* Warn on (likely bogus) wildcard usage in HTTP.  Don't use
1788      has_wildcards_p because it would also warn on `?', and we know that
1789      shows up in CGI paths a *lot*.  */
1790   if (strchr (u->url, '*'))
1791     logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1792
1793   xzero (hstat);
1794
1795   /* Determine the local filename.  */
1796   if (local_file && *local_file)
1797     hstat.local_file = local_file;
1798   else if (local_file)
1799     {
1800       *local_file = url_file_name (u);
1801       hstat.local_file = local_file;
1802     }
1803   else
1804     {
1805       dummy = url_file_name (u);
1806       hstat.local_file = &dummy;
1807     }
1808
1809   if (!opt.output_document)
1810     locf = *hstat.local_file;
1811   else
1812     locf = opt.output_document;
1813
1814   hstat.referer = referer;
1815
1816   filename_len = strlen (*hstat.local_file);
1817   filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1818
1819   if (opt.noclobber && file_exists_p (*hstat.local_file))
1820     {
1821       /* If opt.noclobber is turned on and file already exists, do not
1822          retrieve the file */
1823       logprintf (LOG_VERBOSE, _("\
1824 File `%s' already there, will not retrieve.\n"), *hstat.local_file);
1825       /* If the file is there, we suppose it's retrieved OK.  */
1826       *dt |= RETROKF;
1827
1828       /* #### Bogusness alert.  */
1829       /* If its suffix is "html" or "htm" or similar, assume text/html.  */
1830       if (has_html_suffix_p (*hstat.local_file))
1831         *dt |= TEXTHTML;
1832
1833       xfree_null (dummy);
1834       return RETROK;
1835     }
1836
1837   use_ts = 0;
1838   if (opt.timestamping)
1839     {
1840       int local_dot_orig_file_exists = 0;
1841
1842       if (opt.backup_converted)
1843         /* If -K is specified, we'll act on the assumption that it was specified
1844            last time these files were downloaded as well, and instead of just
1845            comparing local file X against server file X, we'll compare local
1846            file X.orig (if extant, else X) against server file X.  If -K
1847            _wasn't_ specified last time, or the server contains files called
1848            *.orig, -N will be back to not operating correctly with -k. */
1849         {
1850           /* Would a single s[n]printf() call be faster?  --dan
1851
1852              Definitely not.  sprintf() is horribly slow.  It's a
1853              different question whether the difference between the two
1854              affects a program.  Usually I'd say "no", but at one
1855              point I profiled Wget, and found that a measurable and
1856              non-negligible amount of time was lost calling sprintf()
1857              in url.c.  Replacing sprintf with inline calls to
1858              strcpy() and long_to_string() made a difference.
1859              --hniksic */
1860           memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
1861           memcpy (filename_plus_orig_suffix + filename_len,
1862                   ".orig", sizeof (".orig"));
1863
1864           /* Try to stat() the .orig file. */
1865           if (stat (filename_plus_orig_suffix, &st) == 0)
1866             {
1867               local_dot_orig_file_exists = 1;
1868               local_filename = filename_plus_orig_suffix;
1869             }
1870         }
1871
1872       if (!local_dot_orig_file_exists)
1873         /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1874         if (stat (*hstat.local_file, &st) == 0)
1875           local_filename = *hstat.local_file;
1876
1877       if (local_filename != NULL)
1878         /* There was a local file, so we'll check later to see if the version
1879            the server has is the same version we already have, allowing us to
1880            skip a download. */
1881         {
1882           use_ts = 1;
1883           tml = st.st_mtime;
1884 #ifdef WINDOWS
1885           /* Modification time granularity is 2 seconds for Windows, so
1886              increase local time by 1 second for later comparison. */
1887           tml++;
1888 #endif
1889           local_size = st.st_size;
1890           got_head = 0;
1891         }
1892     }
1893   /* Reset the counter.  */
1894   count = 0;
1895   *dt = 0;
1896   /* THE loop */
1897   do
1898     {
1899       /* Increment the pass counter.  */
1900       ++count;
1901       sleep_between_retrievals (count);
1902       /* Get the current time string.  */
1903       tms = time_str (NULL);
1904       /* Print fetch message, if opt.verbose.  */
1905       if (opt.verbose)
1906         {
1907           char *hurl = url_string (u, 1);
1908           char tmp[15];
1909           strcpy (tmp, "        ");
1910           if (count > 1)
1911             sprintf (tmp, _("(try:%2d)"), count);
1912           logprintf (LOG_VERBOSE, "--%s--  %s\n  %s => `%s'\n",
1913                      tms, hurl, tmp, locf);
1914 #ifdef WINDOWS
1915           ws_changetitle (hurl, 1);
1916 #endif
1917           xfree (hurl);
1918         }
1919
1920       /* Default document type is empty.  However, if spider mode is
1921          on or time-stamping is employed, HEAD_ONLY commands is
1922          encoded within *dt.  */
1923       if (opt.spider || (use_ts && !got_head))
1924         *dt |= HEAD_ONLY;
1925       else
1926         *dt &= ~HEAD_ONLY;
1927
1928       /* Decide whether or not to restart.  */
1929       hstat.restval = 0;
1930       if (count > 1)
1931         hstat.restval = hstat.len; /* continue where we left off */
1932       else if (opt.always_rest
1933                && stat (locf, &st) == 0
1934                && S_ISREG (st.st_mode))
1935         hstat.restval = st.st_size;
1936
1937       /* Decide whether to send the no-cache directive.  We send it in
1938          two cases:
1939            a) we're using a proxy, and we're past our first retrieval.
1940               Some proxies are notorious for caching incomplete data, so
1941               we require a fresh get.
1942            b) caching is explicitly inhibited. */
1943       if ((proxy && count > 1)  /* a */
1944           || !opt.allow_cache   /* b */
1945           )
1946         *dt |= SEND_NOCACHE;
1947       else
1948         *dt &= ~SEND_NOCACHE;
1949
1950       /* Try fetching the document, or at least its head.  */
1951       err = gethttp (u, &hstat, dt, proxy);
1952
1953       /* It's unfortunate that wget determines the local filename before finding
1954          out the Content-Type of the file.  Barring a major restructuring of the
1955          code, we need to re-set locf here, since gethttp() may have xrealloc()d
1956          *hstat.local_file to tack on ".html". */
1957       if (!opt.output_document)
1958         locf = *hstat.local_file;
1959       else
1960         locf = opt.output_document;
1961
1962       /* Time?  */
1963       tms = time_str (NULL);
1964       /* Get the new location (with or without the redirection).  */
1965       if (hstat.newloc)
1966         *newloc = xstrdup (hstat.newloc);
1967       switch (err)
1968         {
1969         case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
1970         case CONERROR: case READERR: case WRITEFAILED:
1971         case RANGEERR:
1972           /* Non-fatal errors continue executing the loop, which will
1973              bring them to "while" statement at the end, to judge
1974              whether the number of tries was exceeded.  */
1975           free_hstat (&hstat);
1976           printwhat (count, opt.ntry);
1977           continue;
1978           break;
1979         case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
1980         case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
1981           /* Fatal errors just return from the function.  */
1982           free_hstat (&hstat);
1983           xfree_null (dummy);
1984           return err;
1985           break;
1986         case FWRITEERR: case FOPENERR:
1987           /* Another fatal error.  */
1988           logputs (LOG_VERBOSE, "\n");
1989           logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
1990                      *hstat.local_file, strerror (errno));
1991           free_hstat (&hstat);
1992           xfree_null (dummy);
1993           return err;
1994           break;
1995         case CONSSLERR:
1996           /* Another fatal error.  */
1997           logputs (LOG_VERBOSE, "\n");
1998           logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
1999           free_hstat (&hstat);
2000           xfree_null (dummy);
2001           return err;
2002           break;
2003         case NEWLOCATION:
2004           /* Return the new location to the caller.  */
2005           if (!hstat.newloc)
2006             {
2007               logprintf (LOG_NOTQUIET,
2008                          _("ERROR: Redirection (%d) without location.\n"),
2009                          hstat.statcode);
2010               free_hstat (&hstat);
2011               xfree_null (dummy);
2012               return WRONGCODE;
2013             }
2014           free_hstat (&hstat);
2015           xfree_null (dummy);
2016           return NEWLOCATION;
2017           break;
2018         case RETRUNNEEDED:
2019           /* The file was already fully retrieved. */
2020           free_hstat (&hstat);
2021           xfree_null (dummy);
2022           return RETROK;
2023           break;
2024         case RETRFINISHED:
2025           /* Deal with you later.  */
2026           break;
2027         default:
2028           /* All possibilities should have been exhausted.  */
2029           abort ();
2030         }
2031       if (!(*dt & RETROKF))
2032         {
2033           if (!opt.verbose)
2034             {
2035               /* #### Ugly ugly ugly! */
2036               char *hurl = url_string (u, 1);
2037               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2038               xfree (hurl);
2039             }
2040           logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2041                      tms, hstat.statcode, hstat.error);
2042           logputs (LOG_VERBOSE, "\n");
2043           free_hstat (&hstat);
2044           xfree_null (dummy);
2045           return WRONGCODE;
2046         }
2047
2048       /* Did we get the time-stamp?  */
2049       if (!got_head)
2050         {
2051           if (opt.timestamping && !hstat.remote_time)
2052             {
2053               logputs (LOG_NOTQUIET, _("\
2054 Last-modified header missing -- time-stamps turned off.\n"));
2055             }
2056           else if (hstat.remote_time)
2057             {
2058               /* Convert the date-string into struct tm.  */
2059               tmr = http_atotm (hstat.remote_time);
2060               if (tmr == (time_t) (-1))
2061                 logputs (LOG_VERBOSE, _("\
2062 Last-modified header invalid -- time-stamp ignored.\n"));
2063             }
2064         }
2065
2066       /* The time-stamping section.  */
2067       if (use_ts)
2068         {
2069           got_head = 1;
2070           *dt &= ~HEAD_ONLY;
2071           use_ts = 0;           /* no more time-stamping */
2072           count = 0;            /* the retrieve count for HEAD is
2073                                    reset */
2074           if (hstat.remote_time && tmr != (time_t) (-1))
2075             {
2076               /* Now time-stamping can be used validly.  Time-stamping
2077                  means that if the sizes of the local and remote file
2078                  match, and local file is newer than the remote file,
2079                  it will not be retrieved.  Otherwise, the normal
2080                  download procedure is resumed.  */
2081               if (tml >= tmr &&
2082                   (hstat.contlen == -1 || local_size == hstat.contlen))
2083                 {
2084                   logprintf (LOG_VERBOSE, _("\
2085 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2086                              local_filename);
2087                   free_hstat (&hstat);
2088                   xfree_null (dummy);
2089                   return RETROK;
2090                 }
2091               else if (tml >= tmr)
2092                 logprintf (LOG_VERBOSE, _("\
2093 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
2094               else
2095                 logputs (LOG_VERBOSE,
2096                          _("Remote file is newer, retrieving.\n"));
2097             }
2098           free_hstat (&hstat);
2099           continue;
2100         }
2101       if ((tmr != (time_t) (-1))
2102           && !opt.spider
2103           && ((hstat.len == hstat.contlen) ||
2104               ((hstat.res == 0) &&
2105                ((hstat.contlen == -1) ||
2106                 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2107         {
2108           /* #### This code repeats in http.c and ftp.c.  Move it to a
2109              function!  */
2110           const char *fl = NULL;
2111           if (opt.output_document)
2112             {
2113               if (output_stream_regular)
2114                 fl = opt.output_document;
2115             }
2116           else
2117             fl = *hstat.local_file;
2118           if (fl)
2119             touch (fl, tmr);
2120         }
2121       /* End of time-stamping section.  */
2122
2123       if (opt.spider)
2124         {
2125           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
2126           xfree_null (dummy);
2127           return RETROK;
2128         }
2129
2130       tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
2131
2132       if (hstat.len == hstat.contlen)
2133         {
2134           if (*dt & RETROKF)
2135             {
2136               logprintf (LOG_VERBOSE,
2137                          _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
2138                          tms, tmrate, locf, hstat.len, hstat.contlen);
2139               logprintf (LOG_NONVERBOSE,
2140                          "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2141                          tms, u->url, hstat.len, hstat.contlen, locf, count);
2142             }
2143           ++opt.numurls;
2144           total_downloaded_bytes += hstat.len;
2145
2146           /* Remember that we downloaded the file for later ".orig" code. */
2147           if (*dt & ADDED_HTML_EXTENSION)
2148             downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2149           else
2150             downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2151
2152           free_hstat (&hstat);
2153           xfree_null (dummy);
2154           return RETROK;
2155         }
2156       else if (hstat.res == 0) /* No read error */
2157         {
2158           if (hstat.contlen == -1)  /* We don't know how much we were supposed
2159                                        to get, so assume we succeeded. */
2160             {
2161               if (*dt & RETROKF)
2162                 {
2163                   logprintf (LOG_VERBOSE,
2164                              _("%s (%s) - `%s' saved [%ld]\n\n"),
2165                              tms, tmrate, locf, hstat.len);
2166                   logprintf (LOG_NONVERBOSE,
2167                              "%s URL:%s [%ld] -> \"%s\" [%d]\n",
2168                              tms, u->url, hstat.len, locf, count);
2169                 }
2170               ++opt.numurls;
2171               total_downloaded_bytes += hstat.len;
2172
2173               /* Remember that we downloaded the file for later ".orig" code. */
2174               if (*dt & ADDED_HTML_EXTENSION)
2175                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2176               else
2177                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2178
2179               free_hstat (&hstat);
2180               xfree_null (dummy);
2181               return RETROK;
2182             }
2183           else if (hstat.len < hstat.contlen) /* meaning we lost the
2184                                                  connection too soon */
2185             {
2186               logprintf (LOG_VERBOSE,
2187                          _("%s (%s) - Connection closed at byte %ld. "),
2188                          tms, tmrate, hstat.len);
2189               printwhat (count, opt.ntry);
2190               free_hstat (&hstat);
2191               continue;
2192             }
2193           else if (!opt.kill_longer) /* meaning we got more than expected */
2194             {
2195               logprintf (LOG_VERBOSE,
2196                          _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
2197                          tms, tmrate, locf, hstat.len, hstat.contlen);
2198               logprintf (LOG_NONVERBOSE,
2199                          "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2200                          tms, u->url, hstat.len, hstat.contlen, locf, count);
2201               ++opt.numurls;
2202               total_downloaded_bytes += hstat.len;
2203
2204               /* Remember that we downloaded the file for later ".orig" code. */
2205               if (*dt & ADDED_HTML_EXTENSION)
2206                 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2207               else
2208                 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2209
2210               free_hstat (&hstat);
2211               xfree_null (dummy);
2212               return RETROK;
2213             }
2214           else                  /* the same, but not accepted */
2215             {
2216               logprintf (LOG_VERBOSE,
2217                          _("%s (%s) - Connection closed at byte %ld/%ld. "),
2218                          tms, tmrate, hstat.len, hstat.contlen);
2219               printwhat (count, opt.ntry);
2220               free_hstat (&hstat);
2221               continue;
2222             }
2223         }
2224       else                      /* now hstat.res can only be -1 */
2225         {
2226           if (hstat.contlen == -1)
2227             {
2228               logprintf (LOG_VERBOSE,
2229                          _("%s (%s) - Read error at byte %ld (%s)."),
2230                          tms, tmrate, hstat.len, strerror (errno));
2231               printwhat (count, opt.ntry);
2232               free_hstat (&hstat);
2233               continue;
2234             }
2235           else                  /* hstat.res == -1 and contlen is given */
2236             {
2237               logprintf (LOG_VERBOSE,
2238                          _("%s (%s) - Read error at byte %ld/%ld (%s). "),
2239                          tms, tmrate, hstat.len, hstat.contlen,
2240                          strerror (errno));
2241               printwhat (count, opt.ntry);
2242               free_hstat (&hstat);
2243               continue;
2244             }
2245         }
2246       /* not reached */
2247       break;
2248     }
2249   while (!opt.ntry || (count < opt.ntry));
2250   return TRYLIMEXC;
2251 }
2252 \f
2253 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2254    than local timezone.
2255
2256    mktime is similar but assumes struct tm, also known as the
2257    "broken-down" form of time, is in local time zone.  mktime_from_utc
2258    uses mktime to make the conversion understanding that an offset
2259    will be introduced by the local time assumption.
2260
2261    mktime_from_utc then measures the introduced offset by applying
2262    gmtime to the initial result and applying mktime to the resulting
2263    "broken-down" form.  The difference between the two mktime results
2264    is the measured offset which is then subtracted from the initial
2265    mktime result to yield a calendar time which is the value returned.
2266
2267    tm_isdst in struct tm is set to 0 to force mktime to introduce a
2268    consistent offset (the non DST offset) since tm and tm+o might be
2269    on opposite sides of a DST change.
2270
2271    Some implementations of mktime return -1 for the nonexistent
2272    localtime hour at the beginning of DST.  In this event, use
2273    mktime(tm - 1hr) + 3600.
2274
2275    Schematically
2276      mktime(tm)   --> t+o
2277      gmtime(t+o)  --> tm+o
2278      mktime(tm+o) --> t+2o
2279      t+o - (t+2o - t+o) = t
2280
2281    Note that glibc contains a function of the same purpose named
2282    `timegm' (reverse of gmtime).  But obviously, it is not universally
2283    available, and unfortunately it is not straightforwardly
2284    extractable for use here.  Perhaps configure should detect timegm
2285    and use it where available.
2286
2287    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2288    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2289    Further improved by Roger with assistance from Edward J. Sabol
2290    based on input by Jamie Zawinski.  */
2291
2292 static time_t
2293 mktime_from_utc (struct tm *t)
2294 {
2295   time_t tl, tb;
2296   struct tm *tg;
2297
2298   tl = mktime (t);
2299   if (tl == -1)
2300     {
2301       t->tm_hour--;
2302       tl = mktime (t);
2303       if (tl == -1)
2304         return -1; /* can't deal with output from strptime */
2305       tl += 3600;
2306     }
2307   tg = gmtime (&tl);
2308   tg->tm_isdst = 0;
2309   tb = mktime (tg);
2310   if (tb == -1)
2311     {
2312       tg->tm_hour--;
2313       tb = mktime (tg);
2314       if (tb == -1)
2315         return -1; /* can't deal with output from gmtime */
2316       tb += 3600;
2317     }
2318   return (tl - (tb - tl));
2319 }
2320
2321 /* Check whether the result of strptime() indicates success.
2322    strptime() returns the pointer to how far it got to in the string.
2323    The processing has been successful if the string is at `GMT' or
2324    `+X', or at the end of the string.
2325
2326    In extended regexp parlance, the function returns 1 if P matches
2327    "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
2328    can return) is considered a failure and 0 is returned.  */
2329 static int
2330 check_end (const char *p)
2331 {
2332   if (!p)
2333     return 0;
2334   while (ISSPACE (*p))
2335     ++p;
2336   if (!*p
2337       || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2338       || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2339     return 1;
2340   else
2341     return 0;
2342 }
2343
2344 /* Convert the textual specification of time in TIME_STRING to the
2345    number of seconds since the Epoch.
2346
2347    TIME_STRING can be in any of the three formats RFC2068 allows the
2348    HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date.
2349    Timezones are ignored, and should be GMT.
2350
2351    Return the computed time_t representation, or -1 if the conversion
2352    fails.
2353
2354    This function uses strptime with various string formats for parsing
2355    TIME_STRING.  This results in a parser that is not as lenient in
2356    interpreting TIME_STRING as I would like it to be.  Being based on
2357    strptime, it always allows shortened months, one-digit days, etc.,
2358    but due to the multitude of formats in which time can be
2359    represented, an ideal HTTP time parser would be even more
2360    forgiving.  It should completely ignore things like week days and
2361    concentrate only on the various forms of representing years,
2362    months, days, hours, minutes, and seconds.  For example, it would
2363    be nice if it accepted ISO 8601 out of the box.
2364
2365    I've investigated free and PD code for this purpose, but none was
2366    usable.  getdate was big and unwieldy, and had potential copyright
2367    issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
2368    distributed with phttpd, is excellent, but we cannot use it because
2369    it is not assigned to the FSF.  So I stuck it with strptime.  */
2370
2371 time_t
2372 http_atotm (const char *time_string)
2373 {
2374   /* NOTE: Solaris strptime man page claims that %n and %t match white
2375      space, but that's not universally available.  Instead, we simply
2376      use ` ' to mean "skip all WS", which works under all strptime
2377      implementations I've tested.  */
2378
2379   static const char *time_formats[] = {
2380     "%a, %d %b %Y %T",          /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
2381     "%A, %d-%b-%y %T",          /* RFC850:  Thursday, 29-Jan-98 22:12:57 */
2382     "%a, %d-%b-%Y %T",          /* pseudo-RFC850:  Thu, 29-Jan-1998 22:12:57
2383                                    (google.com uses this for their cookies.) */
2384     "%a %b %d %T %Y"            /* asctime: Thu Jan 29 22:12:57 1998 */
2385   };
2386
2387   int i;
2388   struct tm t;
2389
2390   /* According to Roger Beeman, we need to initialize tm_isdst, since
2391      strptime won't do it.  */
2392   t.tm_isdst = 0;
2393
2394   /* Note that under foreign locales Solaris strptime() fails to
2395      recognize English dates, which renders this function useless.  We
2396      solve this by being careful not to affect LC_TIME when
2397      initializing locale.
2398
2399      Another solution would be to temporarily set locale to C, invoke
2400      strptime(), and restore it back.  This is slow and dirty,
2401      however, and locale support other than LC_MESSAGES can mess other
2402      things, so I rather chose to stick with just setting LC_MESSAGES.
2403
2404      GNU strptime does not have this problem because it recognizes
2405      both international and local dates.  */
2406
2407   for (i = 0; i < countof (time_formats); i++)
2408     if (check_end (strptime (time_string, time_formats[i], &t)))
2409       return mktime_from_utc (&t);
2410
2411   /* All formats have failed.  */
2412   return -1;
2413 }
2414 \f
2415 /* Authorization support: We support two authorization schemes:
2416
2417    * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2418
2419    * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2420    consisting of answering to the server's challenge with the proper
2421    MD5 digests.  */
2422
2423 /* How many bytes it will take to store LEN bytes in base64.  */
2424 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
2425
2426 /* Encode the string S of length LENGTH to base64 format and place it
2427    to STORE.  STORE will be 0-terminated, and must point to a writable
2428    buffer of at least 1+BASE64_LENGTH(length) bytes.  */
2429 static void
2430 base64_encode (const char *s, char *store, int length)
2431 {
2432   /* Conversion table.  */
2433   static char tbl[64] = {
2434     'A','B','C','D','E','F','G','H',
2435     'I','J','K','L','M','N','O','P',
2436     'Q','R','S','T','U','V','W','X',
2437     'Y','Z','a','b','c','d','e','f',
2438     'g','h','i','j','k','l','m','n',
2439     'o','p','q','r','s','t','u','v',
2440     'w','x','y','z','0','1','2','3',
2441     '4','5','6','7','8','9','+','/'
2442   };
2443   int i;
2444   unsigned char *p = (unsigned char *)store;
2445
2446   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2447   for (i = 0; i < length; i += 3)
2448     {
2449       *p++ = tbl[s[0] >> 2];
2450       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2451       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2452       *p++ = tbl[s[2] & 0x3f];
2453       s += 3;
2454     }
2455   /* Pad the result if necessary...  */
2456   if (i == length + 1)
2457     *(p - 1) = '=';
2458   else if (i == length + 2)
2459     *(p - 1) = *(p - 2) = '=';
2460   /* ...and zero-terminate it.  */
2461   *p = '\0';
2462 }
2463
2464 /* Create the authentication header contents for the `Basic' scheme.
2465    This is done by encoding the string `USER:PASS' in base64 and
2466    prepending `HEADER: Basic ' to it.  */
2467 static char *
2468 basic_authentication_encode (const char *user, const char *passwd)
2469 {
2470   char *t1, *t2, *res;
2471   int len1 = strlen (user) + 1 + strlen (passwd);
2472   int len2 = BASE64_LENGTH (len1);
2473
2474   t1 = (char *)alloca (len1 + 1);
2475   sprintf (t1, "%s:%s", user, passwd);
2476
2477   t2 = (char *)alloca (len2 + 1);
2478   base64_encode (t1, t2, len1);
2479
2480   res = (char *)xmalloc (6 + len2 + 1);
2481   sprintf (res, "Basic %s", t2);
2482
2483   return res;
2484 }
2485
2486 #define SKIP_WS(x) do {                         \
2487   while (ISSPACE (*(x)))                        \
2488     ++(x);                                      \
2489 } while (0)
2490
2491 #ifdef USE_DIGEST
2492 /* Parse HTTP `WWW-Authenticate:' header.  AU points to the beginning
2493    of a field in such a header.  If the field is the one specified by
2494    ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2495    digest authorization code), extract its value in the (char*)
2496    variable pointed by RET.  Returns negative on a malformed header,
2497    or number of bytes that have been parsed by this call.  */
2498 static int
2499 extract_header_attr (const char *au, const char *attr_name, char **ret)
2500 {
2501   const char *cp, *ep;
2502
2503   ep = cp = au;
2504
2505   if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2506     {
2507       cp += strlen (attr_name);
2508       if (!*cp)
2509         return -1;
2510       SKIP_WS (cp);
2511       if (*cp != '=')
2512         return -1;
2513       if (!*++cp)
2514         return -1;
2515       SKIP_WS (cp);
2516       if (*cp != '\"')
2517         return -1;
2518       if (!*++cp)
2519         return -1;
2520       for (ep = cp; *ep && *ep != '\"'; ep++)
2521         ;
2522       if (!*ep)
2523         return -1;
2524       xfree_null (*ret);
2525       *ret = strdupdelim (cp, ep);
2526       return ep - au + 1;
2527     }
2528   else
2529     return 0;
2530 }
2531
2532 /* Dump the hexadecimal representation of HASH to BUF.  HASH should be
2533    an array of 16 bytes containing the hash keys, and BUF should be a
2534    buffer of 33 writable characters (32 for hex digits plus one for
2535    zero termination).  */
2536 static void
2537 dump_hash (unsigned char *buf, const unsigned char *hash)
2538 {
2539   int i;
2540
2541   for (i = 0; i < MD5_HASHLEN; i++, hash++)
2542     {
2543       *buf++ = XNUM_TO_digit (*hash >> 4);
2544       *buf++ = XNUM_TO_digit (*hash & 0xf);
2545     }
2546   *buf = '\0';
2547 }
2548
2549 /* Take the line apart to find the challenge, and compose a digest
2550    authorization header.  See RFC2069 section 2.1.2.  */
2551 static char *
2552 digest_authentication_encode (const char *au, const char *user,
2553                               const char *passwd, const char *method,
2554                               const char *path)
2555 {
2556   static char *realm, *opaque, *nonce;
2557   static struct {
2558     const char *name;
2559     char **variable;
2560   } options[] = {
2561     { "realm", &realm },
2562     { "opaque", &opaque },
2563     { "nonce", &nonce }
2564   };
2565   char *res;
2566
2567   realm = opaque = nonce = NULL;
2568
2569   au += 6;                      /* skip over `Digest' */
2570   while (*au)
2571     {
2572       int i;
2573
2574       SKIP_WS (au);
2575       for (i = 0; i < countof (options); i++)
2576         {
2577           int skip = extract_header_attr (au, options[i].name,
2578                                           options[i].variable);
2579           if (skip < 0)
2580             {
2581               xfree_null (realm);
2582               xfree_null (opaque);
2583               xfree_null (nonce);
2584               return NULL;
2585             }
2586           else if (skip)
2587             {
2588               au += skip;
2589               break;
2590             }
2591         }
2592       if (i == countof (options))
2593         {
2594           while (*au && *au != '=')
2595             au++;
2596           if (*au && *++au)
2597             {
2598               SKIP_WS (au);
2599               if (*au == '\"')
2600                 {
2601                   au++;
2602                   while (*au && *au != '\"')
2603                     au++;
2604                   if (*au)
2605                     au++;
2606                 }
2607             }
2608         }
2609       while (*au && *au != ',')
2610         au++;
2611       if (*au)
2612         au++;
2613     }
2614   if (!realm || !nonce || !user || !passwd || !path || !method)
2615     {
2616       xfree_null (realm);
2617       xfree_null (opaque);
2618       xfree_null (nonce);
2619       return NULL;
2620     }
2621
2622   /* Calculate the digest value.  */
2623   {
2624     ALLOCA_MD5_CONTEXT (ctx);
2625     unsigned char hash[MD5_HASHLEN];
2626     unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2627     unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2628
2629     /* A1BUF = H(user ":" realm ":" password) */
2630     gen_md5_init (ctx);
2631     gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2632     gen_md5_update ((unsigned char *)":", 1, ctx);
2633     gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2634     gen_md5_update ((unsigned char *)":", 1, ctx);
2635     gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2636     gen_md5_finish (ctx, hash);
2637     dump_hash (a1buf, hash);
2638
2639     /* A2BUF = H(method ":" path) */
2640     gen_md5_init (ctx);
2641     gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2642     gen_md5_update ((unsigned char *)":", 1, ctx);
2643     gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2644     gen_md5_finish (ctx, hash);
2645     dump_hash (a2buf, hash);
2646
2647     /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2648     gen_md5_init (ctx);
2649     gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2650     gen_md5_update ((unsigned char *)":", 1, ctx);
2651     gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2652     gen_md5_update ((unsigned char *)":", 1, ctx);
2653     gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2654     gen_md5_finish (ctx, hash);
2655     dump_hash (response_digest, hash);
2656
2657     res = (char*) xmalloc (strlen (user)
2658                            + strlen (user)
2659                            + strlen (realm)
2660                            + strlen (nonce)
2661                            + strlen (path)
2662                            + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2663                            + (opaque ? strlen (opaque) : 0)
2664                            + 128);
2665     sprintf (res, "Digest \
2666 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2667              user, realm, nonce, path, response_digest);
2668     if (opaque)
2669       {
2670         char *p = res + strlen (res);
2671         strcat (p, ", opaque=\"");
2672         strcat (p, opaque);
2673         strcat (p, "\"");
2674       }
2675   }
2676   return res;
2677 }
2678 #endif /* USE_DIGEST */
2679
2680
2681 #define BEGINS_WITH(line, string_constant)                              \
2682   (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)   \
2683    && (ISSPACE (line[sizeof (string_constant) - 1])                     \
2684        || !line[sizeof (string_constant) - 1]))
2685
2686 static int
2687 known_authentication_scheme_p (const char *au)
2688 {
2689   return BEGINS_WITH (au, "Basic")
2690     || BEGINS_WITH (au, "Digest")
2691     || BEGINS_WITH (au, "NTLM");
2692 }
2693
2694 #undef BEGINS_WITH
2695
2696 /* Create the HTTP authorization request header.  When the
2697    `WWW-Authenticate' response header is seen, according to the
2698    authorization scheme specified in that header (`Basic' and `Digest'
2699    are supported by the current implementation), produce an
2700    appropriate HTTP authorization request header.  */
2701 static char *
2702 create_authorization_line (const char *au, const char *user,
2703                            const char *passwd, const char *method,
2704                            const char *path)
2705 {
2706   if (0 == strncasecmp (au, "Basic", 5))
2707     return basic_authentication_encode (user, passwd);
2708 #ifdef USE_DIGEST
2709   if (0 == strncasecmp (au, "Digest", 6))
2710     return digest_authentication_encode (au, user, passwd, method, path);
2711 #endif /* USE_DIGEST */
2712   return NULL;
2713 }
2714 \f
2715 void
2716 http_cleanup (void)
2717 {
2718 }