2 Copyright (C) 1996-2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
55 # include "http-ntlm.h"
68 extern char *version_string;
71 # define MIN(x, y) ((x) > (y) ? (y) : (x))
75 static bool cookies_loaded_p;
76 static struct cookie_jar *wget_cookie_jar;
78 #define TEXTHTML_S "text/html"
79 #define TEXTXHTML_S "application/xhtml+xml"
80 #define TEXTCSS_S "text/css"
82 /* Some status code validation macros: */
83 #define H_20X(x) (((x) >= 200) && ((x) < 300))
84 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
85 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
86 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
87 || (x) == HTTP_STATUS_SEE_OTHER \
88 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
90 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
92 #define HTTP_STATUS_OK 200
93 #define HTTP_STATUS_CREATED 201
94 #define HTTP_STATUS_ACCEPTED 202
95 #define HTTP_STATUS_NO_CONTENT 204
96 #define HTTP_STATUS_PARTIAL_CONTENTS 206
98 /* Redirection 3xx. */
99 #define HTTP_STATUS_MULTIPLE_CHOICES 300
100 #define HTTP_STATUS_MOVED_PERMANENTLY 301
101 #define HTTP_STATUS_MOVED_TEMPORARILY 302
102 #define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */
103 #define HTTP_STATUS_NOT_MODIFIED 304
104 #define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */
106 /* Client error 4xx. */
107 #define HTTP_STATUS_BAD_REQUEST 400
108 #define HTTP_STATUS_UNAUTHORIZED 401
109 #define HTTP_STATUS_FORBIDDEN 403
110 #define HTTP_STATUS_NOT_FOUND 404
111 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
113 /* Server errors 5xx. */
114 #define HTTP_STATUS_INTERNAL 500
115 #define HTTP_STATUS_NOT_IMPLEMENTED 501
116 #define HTTP_STATUS_BAD_GATEWAY 502
117 #define HTTP_STATUS_UNAVAILABLE 503
120 rel_none, rel_name, rel_value, rel_both
127 struct request_header {
129 enum rp release_policy;
131 int hcount, hcapacity;
134 /* Create a new, empty request. At least request_set_method must be
135 called before the request can be used. */
137 static struct request *
140 struct request *req = xnew0 (struct request);
142 req->headers = xnew_array (struct request_header, req->hcapacity);
146 /* Set the request's method and its arguments. METH should be a
147 literal string (or it should outlive the request) because it will
148 not be freed. ARG will be freed by request_free. */
151 request_set_method (struct request *req, const char *meth, char *arg)
157 /* Return the method string passed with the last call to
158 request_set_method. */
161 request_method (const struct request *req)
166 /* Free one header according to the release policy specified with
167 request_set_header. */
170 release_header (struct request_header *hdr)
172 switch (hdr->release_policy)
189 /* Set the request named NAME to VALUE. Specifically, this means that
190 a "NAME: VALUE\r\n" header line will be used in the request. If a
191 header with the same name previously existed in the request, its
192 value will be replaced by this one. A NULL value means do nothing.
194 RELEASE_POLICY determines whether NAME and VALUE should be released
195 (freed) with request_free. Allowed values are:
197 - rel_none - don't free NAME or VALUE
198 - rel_name - free NAME when done
199 - rel_value - free VALUE when done
200 - rel_both - free both NAME and VALUE when done
202 Setting release policy is useful when arguments come from different
203 sources. For example:
205 // Don't free literal strings!
206 request_set_header (req, "Pragma", "no-cache", rel_none);
208 // Don't free a global variable, we'll need it later.
209 request_set_header (req, "Referer", opt.referer, rel_none);
211 // Value freshly allocated, free it when done.
212 request_set_header (req, "Range",
213 aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
218 request_set_header (struct request *req, char *name, char *value,
219 enum rp release_policy)
221 struct request_header *hdr;
226 /* A NULL value is a no-op; if freeing the name is requested,
227 free it now to avoid leaks. */
228 if (release_policy == rel_name || release_policy == rel_both)
233 for (i = 0; i < req->hcount; i++)
235 hdr = &req->headers[i];
236 if (0 == strcasecmp (name, hdr->name))
238 /* Replace existing header. */
239 release_header (hdr);
242 hdr->release_policy = release_policy;
247 /* Install new header. */
249 if (req->hcount >= req->hcapacity)
251 req->hcapacity <<= 1;
252 req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
254 hdr = &req->headers[req->hcount++];
257 hdr->release_policy = release_policy;
260 /* Like request_set_header, but sets the whole header line, as
261 provided by the user using the `--header' option. For example,
262 request_set_user_header (req, "Foo: bar") works just like
263 request_set_header (req, "Foo", "bar"). */
266 request_set_user_header (struct request *req, const char *header)
269 const char *p = strchr (header, ':');
272 BOUNDED_TO_ALLOCA (header, p, name);
276 request_set_header (req, xstrdup (name), (char *) p, rel_name);
279 /* Remove the header with specified name from REQ. Returns true if
280 the header was actually removed, false otherwise. */
283 request_remove_header (struct request *req, char *name)
286 for (i = 0; i < req->hcount; i++)
288 struct request_header *hdr = &req->headers[i];
289 if (0 == strcasecmp (name, hdr->name))
291 release_header (hdr);
292 /* Move the remaining headers by one. */
293 if (i < req->hcount - 1)
294 memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
302 #define APPEND(p, str) do { \
303 int A_len = strlen (str); \
304 memcpy (p, str, A_len); \
308 /* Construct the request and write it to FD using fd_write. */
311 request_send (const struct request *req, int fd)
313 char *request_string, *p;
314 int i, size, write_error;
316 /* Count the request size. */
319 /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
320 size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
322 for (i = 0; i < req->hcount; i++)
324 struct request_header *hdr = &req->headers[i];
325 /* NAME ": " VALUE "\r\n" */
326 size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
332 p = request_string = alloca_array (char, size);
334 /* Generate the request. */
336 APPEND (p, req->method); *p++ = ' ';
337 APPEND (p, req->arg); *p++ = ' ';
338 memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
340 for (i = 0; i < req->hcount; i++)
342 struct request_header *hdr = &req->headers[i];
343 APPEND (p, hdr->name);
344 *p++ = ':', *p++ = ' ';
345 APPEND (p, hdr->value);
346 *p++ = '\r', *p++ = '\n';
349 *p++ = '\r', *p++ = '\n', *p++ = '\0';
350 assert (p - request_string == size);
354 DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
356 /* Send the request to the server. */
358 write_error = fd_write (fd, request_string, size - 1, -1);
360 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
365 /* Release the resources used by REQ. */
368 request_free (struct request *req)
371 xfree_null (req->arg);
372 for (i = 0; i < req->hcount; i++)
373 release_header (&req->headers[i]);
374 xfree_null (req->headers);
378 /* Send the contents of FILE_NAME to SOCK. Make sure that exactly
379 PROMISED_SIZE bytes are sent over the wire -- if the file is
380 longer, read only that much; if the file is shorter, report an error. */
383 post_file (int sock, const char *file_name, wgint promised_size)
385 static char chunk[8192];
390 DEBUGP (("[writing POST file %s ... ", file_name));
392 fp = fopen (file_name, "rb");
395 while (!feof (fp) && written < promised_size)
398 int length = fread (chunk, 1, sizeof (chunk), fp);
401 towrite = MIN (promised_size - written, length);
402 write_error = fd_write (sock, chunk, towrite, -1);
412 /* If we've written less than was promised, report a (probably
413 nonsensical) error rather than break the promise. */
414 if (written < promised_size)
420 assert (written == promised_size);
421 DEBUGP (("done]\n"));
425 /* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
426 If so, return the pointer to the position after the line, otherwise
427 return NULL. This is used as callback to fd_read_hunk. The data
428 between START and PEEKED has been read and cannot be "unread"; the
429 data after PEEKED has only been peeked. */
432 response_head_terminator (const char *start, const char *peeked, int peeklen)
436 /* If at first peek, verify whether HUNK starts with "HTTP". If
437 not, this is a HTTP/0.9 request and we must bail out without
439 if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
442 /* Look for "\n[\r]\n", and return the following position if found.
443 Start two chars before the current to cover the possibility that
444 part of the terminator (e.g. "\n\r") arrived in the previous
446 p = peeked - start < 2 ? start : peeked - 2;
447 end = peeked + peeklen;
449 /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
450 for (; p < end - 2; p++)
453 if (p[1] == '\r' && p[2] == '\n')
455 else if (p[1] == '\n')
458 /* p==end-2: check for \n\n directly preceding END. */
459 if (p[0] == '\n' && p[1] == '\n')
465 /* The maximum size of a single HTTP response we care to read. Rather
466 than being a limit of the reader implementation, this limit
467 prevents Wget from slurping all available memory upon encountering
468 malicious or buggy server output, thus protecting the user. Define
469 it to 0 to remove the limit. */
471 #define HTTP_RESPONSE_MAX_SIZE 65536
473 /* Read the HTTP request head from FD and return it. The error
474 conditions are the same as with fd_read_hunk.
476 To support HTTP/0.9 responses, this function tries to make sure
477 that the data begins with "HTTP". If this is not the case, no data
478 is read and an empty request is returned, so that the remaining
479 data can be treated as body. */
482 read_http_response_head (int fd)
484 return fd_read_hunk (fd, response_head_terminator, 512,
485 HTTP_RESPONSE_MAX_SIZE);
489 /* The response data. */
492 /* The array of pointers that indicate where each header starts.
493 For example, given this HTTP response:
500 The headers are located like this:
502 "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
504 headers[0] headers[1] headers[2] headers[3]
506 I.e. headers[0] points to the beginning of the request,
507 headers[1] points to the end of the first header and the
508 beginning of the second one, etc. */
510 const char **headers;
513 /* Create a new response object from the text of the HTTP response,
514 available in HEAD. That text is automatically split into
515 constituent header lines for fast retrieval using
518 static struct response *
519 resp_new (const char *head)
524 struct response *resp = xnew0 (struct response);
529 /* Empty head means that we're dealing with a headerless
530 (HTTP/0.9) response. In that case, don't set HEADERS at
535 /* Split HEAD into header lines, so that resp_header_* functions
536 don't need to do this over and over again. */
542 DO_REALLOC (resp->headers, size, count + 1, const char *);
543 resp->headers[count++] = hdr;
545 /* Break upon encountering an empty line. */
546 if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
549 /* Find the end of HDR, including continuations. */
552 const char *end = strchr (hdr, '\n');
558 while (*hdr == ' ' || *hdr == '\t');
560 DO_REALLOC (resp->headers, size, count + 1, const char *);
561 resp->headers[count] = NULL;
566 /* Locate the header named NAME in the request data, starting with
567 position START. This allows the code to loop through the request
568 data, filtering for all requests of a given name. Returns the
569 found position, or -1 for failure. The code that uses this
570 function typically looks like this:
572 for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
573 ... do something with header ...
575 If you only care about one header, use resp_header_get instead of
579 resp_header_locate (const struct response *resp, const char *name, int start,
580 const char **begptr, const char **endptr)
583 const char **headers = resp->headers;
586 if (!headers || !headers[1])
589 name_len = strlen (name);
595 for (; headers[i + 1]; i++)
597 const char *b = headers[i];
598 const char *e = headers[i + 1];
600 && b[name_len] == ':'
601 && 0 == strncasecmp (b, name, name_len))
604 while (b < e && ISSPACE (*b))
606 while (b < e && ISSPACE (e[-1]))
616 /* Find and retrieve the header named NAME in the request data. If
617 found, set *BEGPTR to its starting, and *ENDPTR to its ending
618 position, and return true. Otherwise return false.
620 This function is used as a building block for resp_header_copy
621 and resp_header_strdup. */
624 resp_header_get (const struct response *resp, const char *name,
625 const char **begptr, const char **endptr)
627 int pos = resp_header_locate (resp, name, 0, begptr, endptr);
631 /* Copy the response header named NAME to buffer BUF, no longer than
632 BUFSIZE (BUFSIZE includes the terminating 0). If the header
633 exists, true is returned, false otherwise. If there should be no
634 limit on the size of the header, use resp_header_strdup instead.
636 If BUFSIZE is 0, no data is copied, but the boolean indication of
637 whether the header is present is still returned. */
640 resp_header_copy (const struct response *resp, const char *name,
641 char *buf, int bufsize)
644 if (!resp_header_get (resp, name, &b, &e))
648 int len = MIN (e - b, bufsize - 1);
649 memcpy (buf, b, len);
655 /* Return the value of header named NAME in RESP, allocated with
656 malloc. If such a header does not exist in RESP, return NULL. */
659 resp_header_strdup (const struct response *resp, const char *name)
662 if (!resp_header_get (resp, name, &b, &e))
664 return strdupdelim (b, e);
667 /* Parse the HTTP status line, which is of format:
669 HTTP-Version SP Status-Code SP Reason-Phrase
671 The function returns the status-code, or -1 if the status line
672 appears malformed. The pointer to "reason-phrase" message is
673 returned in *MESSAGE. */
676 resp_status (const struct response *resp, char **message)
683 /* For a HTTP/0.9 response, assume status 200. */
685 *message = xstrdup (_("No headers, assuming HTTP/0.9"));
689 p = resp->headers[0];
690 end = resp->headers[1];
696 if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
700 /* Match the HTTP version. This is optional because Gnutella
701 servers have been reported to not specify HTTP version. */
702 if (p < end && *p == '/')
705 while (p < end && ISDIGIT (*p))
707 if (p < end && *p == '.')
709 while (p < end && ISDIGIT (*p))
713 while (p < end && ISSPACE (*p))
715 if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
718 status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
723 while (p < end && ISSPACE (*p))
725 while (p < end && ISSPACE (end[-1]))
727 *message = strdupdelim (p, end);
733 /* Release the resources used by RESP. */
736 resp_free (struct response *resp)
738 xfree_null (resp->headers);
742 /* Print the server response, line by line, omitting the trailing CRLF
743 from individual header lines, and prefixed with PREFIX. */
746 print_server_response (const struct response *resp, const char *prefix)
751 for (i = 0; resp->headers[i + 1]; i++)
753 const char *b = resp->headers[i];
754 const char *e = resp->headers[i + 1];
756 if (b < e && e[-1] == '\n')
758 if (b < e && e[-1] == '\r')
760 /* This is safe even on printfs with broken handling of "%.<n>s"
761 because resp->headers ends with \0. */
762 logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
766 /* Parse the `Content-Range' header and extract the information it
767 contains. Returns true if successful, false otherwise. */
769 parse_content_range (const char *hdr, wgint *first_byte_ptr,
770 wgint *last_byte_ptr, wgint *entity_length_ptr)
774 /* Ancient versions of Netscape proxy server, presumably predating
775 rfc2068, sent out `Content-Range' without the "bytes"
777 if (0 == strncasecmp (hdr, "bytes", 5))
780 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
784 while (ISSPACE (*hdr))
791 for (num = 0; ISDIGIT (*hdr); hdr++)
792 num = 10 * num + (*hdr - '0');
793 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
795 *first_byte_ptr = num;
797 for (num = 0; ISDIGIT (*hdr); hdr++)
798 num = 10 * num + (*hdr - '0');
799 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
801 *last_byte_ptr = num;
803 for (num = 0; ISDIGIT (*hdr); hdr++)
804 num = 10 * num + (*hdr - '0');
805 *entity_length_ptr = num;
809 /* Read the body of the request, but don't store it anywhere and don't
810 display a progress gauge. This is useful for reading the bodies of
811 administrative responses to which we will soon issue another
812 request. The response is not useful to the user, but reading it
813 allows us to continue using the same connection to the server.
815 If reading fails, false is returned, true otherwise. In debug
816 mode, the body is displayed for debugging purposes. */
819 skip_short_body (int fd, wgint contlen)
822 SKIP_SIZE = 512, /* size of the download buffer */
823 SKIP_THRESHOLD = 4096 /* the largest size we read */
825 char dlbuf[SKIP_SIZE + 1];
826 dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
828 /* We shouldn't get here with unknown contlen. (This will change
829 with HTTP/1.1, which supports "chunked" transfer.) */
830 assert (contlen != -1);
832 /* If the body is too large, it makes more sense to simply close the
833 connection than to try to read the body. */
834 if (contlen > SKIP_THRESHOLD)
837 DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
841 int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
844 /* Don't normally report the error since this is an
845 optimization that should be invisible to the user. */
846 DEBUGP (("] aborting (%s).\n",
847 ret < 0 ? fd_errstr (fd) : "EOF received"));
851 /* Safe even if %.*s bogusly expects terminating \0 because
852 we've zero-terminated dlbuf above. */
853 DEBUGP (("%.*s", ret, dlbuf));
856 DEBUGP (("] done.\n"));
860 /* Extract a parameter from the string (typically an HTTP header) at
861 **SOURCE and advance SOURCE to the next parameter. Return false
862 when there are no more parameters to extract. The name of the
863 parameter is returned in NAME, and the value in VALUE. If the
864 parameter has no value, the token's value is zeroed out.
866 For example, if *SOURCE points to the string "attachment;
867 filename=\"foo bar\"", the first call to this function will return
868 the token named "attachment" and no value, and the second call will
869 return the token named "filename" and value "foo bar". The third
870 call will return false, indicating no more valid tokens. */
873 extract_param (const char **source, param_token *name, param_token *value,
876 const char *p = *source;
878 while (ISSPACE (*p)) ++p;
882 return false; /* no error; nothing more to extract */
887 while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
889 if (name->b == name->e)
890 return false; /* empty name: error */
891 while (ISSPACE (*p)) ++p;
892 if (*p == separator || !*p) /* no value */
895 if (*p == separator) ++p;
900 return false; /* error */
902 /* *p is '=', extract value */
904 while (ISSPACE (*p)) ++p;
905 if (*p == '"') /* quoted */
908 while (*p && *p != '"') ++p;
912 /* Currently at closing quote; find the end of param. */
913 while (ISSPACE (*p)) ++p;
914 while (*p && *p != separator) ++p;
918 /* garbage after closed quote, e.g. foo="bar"baz */
924 while (*p && *p != separator) ++p;
926 while (value->e != value->b && ISSPACE (value->e[-1]))
928 if (*p == separator) ++p;
935 #define MAX(p, q) ((p) > (q) ? (p) : (q))
937 /* Parse the contents of the `Content-Disposition' header, extracting
938 the information useful to Wget. Content-Disposition is a header
939 borrowed from MIME; when used in HTTP, it typically serves for
940 specifying the desired file name of the resource. For example:
942 Content-Disposition: attachment; filename="flora.jpg"
944 Wget will skip the tokens it doesn't care about, such as
945 "attachment" in the previous example; it will also skip other
946 unrecognized params. If the header is syntactically correct and
947 contains a file name, a copy of the file name is stored in
948 *filename and true is returned. Otherwise, the function returns
951 The file name is stripped of directory components and must not be
955 parse_content_disposition (const char *hdr, char **filename)
957 param_token name, value;
958 while (extract_param (&hdr, &name, &value, ';'))
959 if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
961 /* Make the file name begin at the last slash or backslash. */
962 const char *last_slash = memrchr (value.b, '/', value.e - value.b);
963 const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
964 if (last_slash && last_bs)
965 value.b = 1 + MAX (last_slash, last_bs);
966 else if (last_slash || last_bs)
967 value.b = 1 + (last_slash ? last_slash : last_bs);
968 if (value.b == value.e)
970 *filename = strdupdelim (value.b, value.e);
976 /* Persistent connections. Currently, we cache the most recently used
977 connection as persistent, provided that the HTTP server agrees to
978 make it such. The persistence data is stored in the variables
979 below. Ideally, it should be possible to cache an arbitrary fixed
980 number of these connections. */
982 /* Whether a persistent connection is active. */
983 static bool pconn_active;
986 /* The socket of the connection. */
989 /* Host and port of the currently active persistent connection. */
993 /* Whether a ssl handshake has occoured on this connection. */
996 /* Whether the connection was authorized. This is only done by
997 NTLM, which authorizes *connections* rather than individual
998 requests. (That practice is peculiar for HTTP, but it is a
999 useful optimization.) */
1003 /* NTLM data of the current connection. */
1004 struct ntlmdata ntlm;
1008 /* Mark the persistent connection as invalid and free the resources it
1009 uses. This is used by the CLOSE_* macros after they forcefully
1010 close a registered persistent connection. */
1013 invalidate_persistent (void)
1015 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
1016 pconn_active = false;
1017 fd_close (pconn.socket);
1022 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
1023 persistent. This will enable someone to use the same connection
1024 later. In the context of HTTP, this must be called only AFTER the
1025 response has been received and the server has promised that the
1026 connection will remain alive.
1028 If a previous connection was persistent, it is closed. */
1031 register_persistent (const char *host, int port, int fd, bool ssl)
1035 if (pconn.socket == fd)
1037 /* The connection FD is already registered. */
1042 /* The old persistent connection is still active; close it
1043 first. This situation arises whenever a persistent
1044 connection exists, but we then connect to a different
1045 host, and try to register a persistent connection to that
1047 invalidate_persistent ();
1051 pconn_active = true;
1053 pconn.host = xstrdup (host);
1056 pconn.authorized = false;
1058 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
1061 /* Return true if a persistent connection is available for connecting
1065 persistent_available_p (const char *host, int port, bool ssl,
1066 bool *host_lookup_failed)
1068 /* First, check whether a persistent connection is active at all. */
1072 /* If we want SSL and the last connection wasn't or vice versa,
1073 don't use it. Checking for host and port is not enough because
1074 HTTP and HTTPS can apparently coexist on the same port. */
1075 if (ssl != pconn.ssl)
1078 /* If we're not connecting to the same port, we're not interested. */
1079 if (port != pconn.port)
1082 /* If the host is the same, we're in business. If not, there is
1083 still hope -- read below. */
1084 if (0 != strcasecmp (host, pconn.host))
1086 /* Check if pconn.socket is talking to HOST under another name.
1087 This happens often when both sites are virtual hosts
1088 distinguished only by name and served by the same network
1089 interface, and hence the same web server (possibly set up by
1090 the ISP and serving many different web sites). This
1091 admittedly unconventional optimization does not contradict
1092 HTTP and works well with popular server software. */
1096 struct address_list *al;
1099 /* Don't try to talk to two different SSL sites over the same
1100 secure connection! (Besides, it's not clear that
1101 name-based virtual hosting is even possible with SSL.) */
1104 /* If pconn.socket's peer is one of the IP addresses HOST
1105 resolves to, pconn.socket is for all intents and purposes
1106 already talking to HOST. */
1108 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
1110 /* Can't get the peer's address -- something must be very
1111 wrong with the connection. */
1112 invalidate_persistent ();
1115 al = lookup_host (host, 0);
1118 *host_lookup_failed = true;
1122 found = address_list_contains (al, &ip);
1123 address_list_release (al);
1128 /* The persistent connection's peer address was found among the
1129 addresses HOST resolved to; therefore, pconn.sock is in fact
1130 already talking to HOST -- no need to reconnect. */
1133 /* Finally, check whether the connection is still open. This is
1134 important because most servers implement liberal (short) timeout
1135 on persistent connections. Wget can of course always reconnect
1136 if the connection doesn't work out, but it's nicer to know in
1137 advance. This test is a logical followup of the first test, but
1138 is "expensive" and therefore placed at the end of the list.
1140 (Current implementation of test_socket_open has a nice side
1141 effect that it treats sockets with pending data as "closed".
1142 This is exactly what we want: if a broken server sends message
1143 body in response to HEAD, or if it sends more than conent-length
1144 data, we won't reuse the corrupted connection.) */
1146 if (!test_socket_open (pconn.socket))
1148 /* Oops, the socket is no longer open. Now that we know that,
1149 let's invalidate the persistent connection before returning
1151 invalidate_persistent ();
1158 /* The idea behind these two CLOSE macros is to distinguish between
1159 two cases: one when the job we've been doing is finished, and we
1160 want to close the connection and leave, and two when something is
1161 seriously wrong and we're closing the connection as part of
1164 In case of keep_alive, CLOSE_FINISH should leave the connection
1165 open, while CLOSE_INVALIDATE should still close it.
1167 Note that the semantics of the flag `keep_alive' is "this
1168 connection *will* be reused (the server has promised not to close
1169 the connection once we're done)", while the semantics of
1170 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1171 active, registered connection". */
1173 #define CLOSE_FINISH(fd) do { \
1176 if (pconn_active && (fd) == pconn.socket) \
1177 invalidate_persistent (); \
1186 #define CLOSE_INVALIDATE(fd) do { \
1187 if (pconn_active && (fd) == pconn.socket) \
1188 invalidate_persistent (); \
1196 wgint len; /* received length */
1197 wgint contlen; /* expected length */
1198 wgint restval; /* the restart value */
1199 int res; /* the result of last read */
1200 char *rderrmsg; /* error message from read error */
1201 char *newloc; /* new location (redirection) */
1202 char *remote_time; /* remote time-stamp string */
1203 char *error; /* textual HTTP error */
1204 int statcode; /* status code */
1205 wgint rd_size; /* amount of data read from socket */
1206 double dltime; /* time it took to download the data */
1207 const char *referer; /* value of the referer header. */
1208 char *local_file; /* local file name. */
1209 bool timestamp_checked; /* true if pre-download time-stamping checks
1210 * have already been performed */
1211 char *orig_file_name; /* name of file to compare for time-stamping
1212 * (might be != local_file if -K is set) */
1213 wgint orig_file_size; /* size of file to compare for time-stamping */
1214 time_t orig_file_tstamp; /* time-stamp of file to compare for
1219 free_hstat (struct http_stat *hs)
1221 xfree_null (hs->newloc);
1222 xfree_null (hs->remote_time);
1223 xfree_null (hs->error);
1224 xfree_null (hs->rderrmsg);
1225 xfree_null (hs->local_file);
1226 xfree_null (hs->orig_file_name);
1228 /* Guard against being called twice. */
1230 hs->remote_time = NULL;
1234 static char *create_authorization_line (const char *, const char *,
1235 const char *, const char *,
1236 const char *, bool *);
1237 static char *basic_authentication_encode (const char *, const char *);
1238 static bool known_authentication_scheme_p (const char *, const char *);
1239 static void ensure_extension (struct http_stat *, const char *, int *);
1240 static void load_cookies (void);
1242 #define BEGINS_WITH(line, string_constant) \
1243 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1244 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1245 || !line[sizeof (string_constant) - 1]))
1247 #define SET_USER_AGENT(req) do { \
1248 if (!opt.useragent) \
1249 request_set_header (req, "User-Agent", \
1250 aprintf ("Wget/%s", version_string), rel_value); \
1251 else if (*opt.useragent) \
1252 request_set_header (req, "User-Agent", opt.useragent, rel_none); \
1255 /* The flags that allow clobbering the file (opening with "wb").
1256 Defined here to avoid repetition later. #### This will require
1258 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1259 || opt.dirstruct || opt.output_document)
1261 /* Retrieve a document through HTTP protocol. It recognizes status
1262 code, and correctly handles redirections. It closes the network
1263 socket. If it receives an error from the functions below it, it
1264 will print it if there is enough information to do so (almost
1265 always), returning the error to the caller (i.e. http_loop).
1267 Various HTTP parameters are stored to hs.
1269 If PROXY is non-NULL, the connection will be made to the proxy
1270 server, and u->url will be requested. */
1272 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1274 struct request *req;
1277 char *user, *passwd;
1281 wgint contlen, contrange;
1288 /* Set to 1 when the authorization has failed permanently and should
1289 not be tried again. */
1290 bool auth_finished = false;
1292 /* Whether NTLM authentication is used for this request. */
1293 bool ntlm_seen = false;
1295 /* Whether our connection to the remote host is through SSL. */
1296 bool using_ssl = false;
1298 /* Whether a HEAD request will be issued (as opposed to GET or
1300 bool head_only = !!(*dt & HEAD_ONLY);
1303 struct response *resp;
1307 /* Whether this connection will be kept alive after the HTTP request
1311 /* Whether keep-alive should be inhibited.
1313 RFC 2068 requests that 1.0 clients not send keep-alive requests
1314 to proxies. This is because many 1.0 proxies do not interpret
1315 the Connection header and transfer it to the remote server,
1316 causing it to not close the connection and leave both the proxy
1317 and the client hanging. */
1318 bool inhibit_keep_alive =
1319 !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1321 /* Headers sent when using POST. */
1322 wgint post_data_size = 0;
1324 bool host_lookup_failed = false;
1327 if (u->scheme == SCHEME_HTTPS)
1329 /* Initialize the SSL context. After this has once been done,
1330 it becomes a no-op. */
1333 scheme_disable (SCHEME_HTTPS);
1334 logprintf (LOG_NOTQUIET,
1335 _("Disabling SSL due to encountered errors.\n"));
1336 return SSLINITFAILED;
1339 #endif /* HAVE_SSL */
1341 /* Initialize certain elements of struct http_stat. */
1345 hs->rderrmsg = NULL;
1347 hs->remote_time = NULL;
1352 /* Prepare the request to send. */
1354 req = request_new ();
1357 const char *meth = "GET";
1360 else if (opt.post_file_name || opt.post_data)
1362 /* Use the full path, i.e. one that includes the leading slash and
1363 the query string. E.g. if u->path is "foo/bar" and u->query is
1364 "param=value", full_path will be "/foo/bar?param=value". */
1367 /* When using SSL over proxy, CONNECT establishes a direct
1368 connection to the HTTPS server. Therefore use the same
1369 argument as when talking to the server directly. */
1370 && u->scheme != SCHEME_HTTPS
1373 meth_arg = xstrdup (u->url);
1375 meth_arg = url_full_path (u);
1376 request_set_method (req, meth, meth_arg);
1379 request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1380 if (*dt & SEND_NOCACHE)
1381 request_set_header (req, "Pragma", "no-cache", rel_none);
1383 request_set_header (req, "Range",
1384 aprintf ("bytes=%s-",
1385 number_to_static_string (hs->restval)),
1387 SET_USER_AGENT (req);
1388 request_set_header (req, "Accept", "*/*", rel_none);
1390 /* Find the username and password for authentication. */
1393 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1394 user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1395 passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1399 /* We have the username and the password, but haven't tried
1400 any authorization yet. Let's see if the "Basic" method
1401 works. If not, we'll come back here and construct a
1402 proper authorization method with the right challenges.
1404 If we didn't employ this kind of logic, every URL that
1405 requires authorization would have to be processed twice,
1406 which is very suboptimal and generates a bunch of false
1407 "unauthorized" errors in the server log.
1409 #### But this logic also has a serious problem when used
1410 with stronger authentications: we *first* transmit the
1411 username and the password in clear text, and *then* attempt a
1412 stronger authentication scheme. That cannot be right! We
1413 are only fortunate that almost everyone still uses the
1414 `Basic' scheme anyway.
1416 There should be an option to prevent this from happening, for
1417 those who use strong authentication schemes and value their
1419 request_set_header (req, "Authorization",
1420 basic_authentication_encode (user, passwd),
1427 char *proxy_user, *proxy_passwd;
1428 /* For normal username and password, URL components override
1429 command-line/wgetrc parameters. With proxy
1430 authentication, it's the reverse, because proxy URLs are
1431 normally the "permanent" ones, so command-line args
1432 should take precedence. */
1433 if (opt.proxy_user && opt.proxy_passwd)
1435 proxy_user = opt.proxy_user;
1436 proxy_passwd = opt.proxy_passwd;
1440 proxy_user = proxy->user;
1441 proxy_passwd = proxy->passwd;
1443 /* #### This does not appear right. Can't the proxy request,
1444 say, `Digest' authentication? */
1445 if (proxy_user && proxy_passwd)
1446 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1448 /* If we're using a proxy, we will be connecting to the proxy
1452 /* Proxy authorization over SSL is handled below. */
1454 if (u->scheme != SCHEME_HTTPS)
1456 request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1459 /* Generate the Host header, HOST:PORT. Take into account that:
1461 - Broken server-side software often doesn't recognize the PORT
1462 argument, so we must generate "Host: www.server.com" instead of
1463 "Host: www.server.com:80" (and likewise for https port).
1465 - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
1466 becomes ambiguous and needs to be rewritten as "Host:
1467 [3ffe:8100:200:2::2]:1234". */
1469 /* Formats arranged for hfmt[add_port][add_squares]. */
1470 static const char *hfmt[][2] = {
1471 { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
1473 int add_port = u->port != scheme_default_port (u->scheme);
1474 int add_squares = strchr (u->host, ':') != NULL;
1475 request_set_header (req, "Host",
1476 aprintf (hfmt[add_port][add_squares], u->host, u->port),
1480 if (!inhibit_keep_alive)
1481 request_set_header (req, "Connection", "Keep-Alive", rel_none);
1484 request_set_header (req, "Cookie",
1485 cookie_header (wget_cookie_jar,
1486 u->host, u->port, u->path,
1488 u->scheme == SCHEME_HTTPS
1495 if (opt.post_data || opt.post_file_name)
1497 request_set_header (req, "Content-Type",
1498 "application/x-www-form-urlencoded", rel_none);
1500 post_data_size = strlen (opt.post_data);
1503 post_data_size = file_size (opt.post_file_name);
1504 if (post_data_size == -1)
1506 logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
1507 opt.post_file_name, strerror (errno));
1511 request_set_header (req, "Content-Length",
1512 xstrdup (number_to_static_string (post_data_size)),
1516 /* Add the user headers. */
1517 if (opt.user_headers)
1520 for (i = 0; opt.user_headers[i]; i++)
1521 request_set_user_header (req, opt.user_headers[i]);
1525 /* We need to come back here when the initial attempt to retrieve
1526 without authorization header fails. (Expected to happen at least
1527 for the Digest authorization scheme.) */
1531 /* Establish the connection. */
1533 if (!inhibit_keep_alive)
1535 /* Look for a persistent connection to target host, unless a
1536 proxy is used. The exception is when SSL is in use, in which
1537 case the proxy is nothing but a passthrough to the target
1538 host, registered as a connection to the latter. */
1539 struct url *relevant = conn;
1541 if (u->scheme == SCHEME_HTTPS)
1545 if (persistent_available_p (relevant->host, relevant->port,
1547 relevant->scheme == SCHEME_HTTPS,
1551 &host_lookup_failed))
1553 sock = pconn.socket;
1554 using_ssl = pconn.ssl;
1555 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1556 escnonprint (pconn.host), pconn.port);
1557 DEBUGP (("Reusing fd %d.\n", sock));
1558 if (pconn.authorized)
1559 /* If the connection is already authorized, the "Basic"
1560 authorization added by code above is unnecessary and
1562 request_remove_header (req, "Authorization");
1568 /* In its current implementation, persistent_available_p will
1569 look up conn->host in some cases. If that lookup failed, we
1570 don't need to bother with connect_to_host. */
1571 if (host_lookup_failed)
1577 sock = connect_to_host (conn->host, conn->port);
1586 return (retryable_socket_connect_error (errno)
1587 ? CONERROR : CONIMPOSSIBLE);
1591 if (proxy && u->scheme == SCHEME_HTTPS)
1593 /* When requesting SSL URLs through proxies, use the
1594 CONNECT method to request passthrough. */
1595 struct request *connreq = request_new ();
1596 request_set_method (connreq, "CONNECT",
1597 aprintf ("%s:%d", u->host, u->port));
1598 SET_USER_AGENT (connreq);
1601 request_set_header (connreq, "Proxy-Authorization",
1602 proxyauth, rel_value);
1603 /* Now that PROXYAUTH is part of the CONNECT request,
1604 zero it out so we don't send proxy authorization with
1605 the regular request below. */
1608 /* Examples in rfc2817 use the Host header in CONNECT
1609 requests. I don't see how that gains anything, given
1610 that the contents of Host would be exactly the same as
1611 the contents of CONNECT. */
1613 write_error = request_send (connreq, sock);
1614 request_free (connreq);
1615 if (write_error < 0)
1617 CLOSE_INVALIDATE (sock);
1621 head = read_http_response_head (sock);
1624 logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1626 CLOSE_INVALIDATE (sock);
1635 DEBUGP (("proxy responded with: [%s]\n", head));
1637 resp = resp_new (head);
1638 statcode = resp_status (resp, &message);
1641 if (statcode != 200)
1644 logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1645 message ? escnonprint (message) : "?");
1646 xfree_null (message);
1649 xfree_null (message);
1651 /* SOCK is now *really* connected to u->host, so update CONN
1652 to reflect this. That way register_persistent will
1653 register SOCK as being connected to u->host:u->port. */
1657 if (conn->scheme == SCHEME_HTTPS)
1659 if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1666 #endif /* HAVE_SSL */
1669 /* Send the request to server. */
1670 write_error = request_send (req, sock);
1672 if (write_error >= 0)
1676 DEBUGP (("[POST data: %s]\n", opt.post_data));
1677 write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1679 else if (opt.post_file_name && post_data_size != 0)
1680 write_error = post_file (sock, opt.post_file_name, post_data_size);
1683 if (write_error < 0)
1685 CLOSE_INVALIDATE (sock);
1689 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1690 proxy ? "Proxy" : "HTTP");
1695 head = read_http_response_head (sock);
1700 logputs (LOG_NOTQUIET, _("No data received.\n"));
1701 CLOSE_INVALIDATE (sock);
1707 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1709 CLOSE_INVALIDATE (sock);
1714 DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1716 resp = resp_new (head);
1718 /* Check for status line. */
1720 statcode = resp_status (resp, &message);
1721 if (!opt.server_response)
1722 logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1723 message ? escnonprint (message) : "");
1726 logprintf (LOG_VERBOSE, "\n");
1727 print_server_response (resp, " ");
1730 /* Determine the local filename if needed. Notice that if -O is used
1731 * hstat.local_file is set by http_loop to the argument of -O. */
1732 if (!hs->local_file)
1734 /* Honor Content-Disposition whether possible. */
1735 if (!opt.content_disposition
1736 || !resp_header_copy (resp, "Content-Disposition",
1737 hdrval, sizeof (hdrval))
1738 || !parse_content_disposition (hdrval, &hs->local_file))
1740 /* The Content-Disposition header is missing or broken.
1741 * Choose unique file name according to given URL. */
1742 hs->local_file = url_file_name (u);
1746 /* TODO: perform this check only once. */
1747 if (file_exists_p (hs->local_file))
1751 /* If opt.noclobber is turned on and file already exists, do not
1752 retrieve the file */
1753 logprintf (LOG_VERBOSE, _("\
1754 File `%s' already there; not retrieving.\n\n"), hs->local_file);
1755 /* If the file is there, we suppose it's retrieved OK. */
1758 /* #### Bogusness alert. */
1759 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1760 if (has_html_suffix_p (hs->local_file))
1765 else if (!ALLOW_CLOBBER)
1767 char *unique = unique_name (hs->local_file, true);
1768 if (unique != hs->local_file)
1769 xfree (hs->local_file);
1770 hs->local_file = unique;
1774 /* Support timestamping */
1775 /* TODO: move this code out of gethttp. */
1776 if (opt.timestamping && !hs->timestamp_checked)
1778 size_t filename_len = strlen (hs->local_file);
1779 char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1780 bool local_dot_orig_file_exists = false;
1781 char *local_filename = NULL;
1784 if (opt.backup_converted)
1785 /* If -K is specified, we'll act on the assumption that it was specified
1786 last time these files were downloaded as well, and instead of just
1787 comparing local file X against server file X, we'll compare local
1788 file X.orig (if extant, else X) against server file X. If -K
1789 _wasn't_ specified last time, or the server contains files called
1790 *.orig, -N will be back to not operating correctly with -k. */
1792 /* Would a single s[n]printf() call be faster? --dan
1794 Definitely not. sprintf() is horribly slow. It's a
1795 different question whether the difference between the two
1796 affects a program. Usually I'd say "no", but at one
1797 point I profiled Wget, and found that a measurable and
1798 non-negligible amount of time was lost calling sprintf()
1799 in url.c. Replacing sprintf with inline calls to
1800 strcpy() and number_to_string() made a difference.
1802 memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
1803 memcpy (filename_plus_orig_suffix + filename_len,
1804 ".orig", sizeof (".orig"));
1806 /* Try to stat() the .orig file. */
1807 if (stat (filename_plus_orig_suffix, &st) == 0)
1809 local_dot_orig_file_exists = true;
1810 local_filename = filename_plus_orig_suffix;
1814 if (!local_dot_orig_file_exists)
1815 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1816 if (stat (hs->local_file, &st) == 0)
1817 local_filename = hs->local_file;
1819 if (local_filename != NULL)
1820 /* There was a local file, so we'll check later to see if the version
1821 the server has is the same version we already have, allowing us to
1824 hs->orig_file_name = xstrdup (local_filename);
1825 hs->orig_file_size = st.st_size;
1826 hs->orig_file_tstamp = st.st_mtime;
1828 /* Modification time granularity is 2 seconds for Windows, so
1829 increase local time by 1 second for later comparison. */
1830 ++hs->orig_file_tstamp;
1835 if (!opt.ignore_length
1836 && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1840 parsed = str_to_wgint (hdrval, NULL, 10);
1841 if (parsed == WGINT_MAX && errno == ERANGE)
1843 #### If Content-Length is out of range, it most likely
1844 means that the file is larger than 2G and that we're
1845 compiled without LFS. In that case we should probably
1846 refuse to even attempt to download the file. */
1852 /* Check for keep-alive related responses. */
1853 if (!inhibit_keep_alive && contlen != -1)
1855 if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1857 else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1859 if (0 == strcasecmp (hdrval, "Keep-Alive"))
1864 /* The server has promised that it will not close the connection
1865 when we're done. This means that we can register it. */
1866 register_persistent (conn->host, conn->port, sock, using_ssl);
1868 if (statcode == HTTP_STATUS_UNAUTHORIZED)
1870 /* Authorization is required. */
1871 if (keep_alive && !head_only && skip_short_body (sock, contlen))
1872 CLOSE_FINISH (sock);
1874 CLOSE_INVALIDATE (sock);
1875 pconn.authorized = false;
1876 if (!auth_finished && (user && passwd))
1878 /* IIS sends multiple copies of WWW-Authenticate, one with
1879 the value "negotiate", and other(s) with data. Loop over
1880 all the occurrences and pick the one we recognize. */
1882 const char *wabeg, *waend;
1883 char *www_authenticate = NULL;
1885 (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1886 &wabeg, &waend)) != -1;
1888 if (known_authentication_scheme_p (wabeg, waend))
1890 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1894 if (!www_authenticate)
1895 /* If the authentication header is missing or
1896 unrecognized, there's no sense in retrying. */
1897 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1898 else if (BEGINS_WITH (www_authenticate, "Basic"))
1899 /* If the authentication scheme is "Basic", which we send
1900 by default, there's no sense in retrying either. (This
1901 should be changed when we stop sending "Basic" data by
1907 pth = url_full_path (u);
1908 request_set_header (req, "Authorization",
1909 create_authorization_line (www_authenticate,
1911 request_method (req),
1915 if (BEGINS_WITH (www_authenticate, "NTLM"))
1918 goto retry_with_auth;
1921 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1925 else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1927 /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1929 pconn.authorized = true;
1933 hs->statcode = statcode;
1935 hs->error = xstrdup (_("Malformed status line"));
1937 hs->error = xstrdup (_("(no description)"));
1939 hs->error = xstrdup (message);
1940 xfree_null (message);
1942 type = resp_header_strdup (resp, "Content-Type");
1945 char *tmp = strchr (type, ';');
1948 while (tmp > type && ISSPACE (tmp[-1]))
1953 hs->newloc = resp_header_strdup (resp, "Location");
1954 hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1956 /* Handle (possibly multiple instances of) the Set-Cookie header. */
1960 const char *scbeg, *scend;
1961 /* The jar should have been created by now. */
1962 assert (wget_cookie_jar != NULL);
1964 (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1965 &scbeg, &scend)) != -1;
1968 char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1969 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
1970 u->path, set_cookie);
1974 if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1976 wgint first_byte_pos, last_byte_pos, entity_length;
1977 if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1979 contrange = first_byte_pos;
1983 /* 20x responses are counted among successful by default. */
1984 if (H_20X (statcode))
1987 /* Return if redirected. */
1988 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1990 /* RFC2068 says that in case of the 300 (multiple choices)
1991 response, the server can output a preferred URL through
1992 `Location' header; otherwise, the request should be treated
1993 like GET. So, if the location is set, it will be a
1994 redirection; otherwise, just proceed normally. */
1995 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1999 logprintf (LOG_VERBOSE,
2000 _("Location: %s%s\n"),
2001 hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
2002 hs->newloc ? _(" [following]") : "");
2003 if (keep_alive && !head_only && skip_short_body (sock, contlen))
2004 CLOSE_FINISH (sock);
2006 CLOSE_INVALIDATE (sock);
2012 /* If content-type is not given, assume text/html. This is because
2013 of the multitude of broken CGI's that "forget" to generate the
2016 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
2017 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
2023 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
2028 if (opt.html_extension)
2031 /* -E / --html-extension / html_extension = on was specified,
2032 and this is a text/html file. If some case-insensitive
2033 variation on ".htm[l]" isn't already the file's suffix,
2036 ensure_extension (hs, ".html", dt);
2038 else if (*dt & TEXTCSS)
2040 ensure_extension (hs, ".css", dt);
2044 if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
2046 /* If `-c' is in use and the file has been fully downloaded (or
2047 the remote file has shrunk), Wget effectively requests bytes
2048 after the end of file and the server response with 416. */
2049 logputs (LOG_VERBOSE, _("\
2050 \n The file is already fully retrieved; nothing to do.\n\n"));
2051 /* In case the caller inspects. */
2054 /* Mark as successfully retrieved. */
2057 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
2058 might be more bytes in the body. */
2059 return RETRUNNEEDED;
2061 if ((contrange != 0 && contrange != hs->restval)
2062 || (H_PARTIAL (statcode) && !contrange))
2064 /* The Range request was somehow misunderstood by the server.
2067 CLOSE_INVALIDATE (sock);
2070 hs->contlen = contlen + contrange;
2076 /* No need to print this output if the body won't be
2077 downloaded at all, or if the original server response is
2079 logputs (LOG_VERBOSE, _("Length: "));
2082 logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
2083 if (contlen + contrange >= 1024)
2084 logprintf (LOG_VERBOSE, " (%s)",
2085 human_readable (contlen + contrange));
2088 if (contlen >= 1024)
2089 logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
2090 number_to_static_string (contlen),
2091 human_readable (contlen));
2093 logprintf (LOG_VERBOSE, _(", %s remaining"),
2094 number_to_static_string (contlen));
2098 logputs (LOG_VERBOSE,
2099 opt.ignore_length ? _("ignored") : _("unspecified"));
2101 logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
2103 logputs (LOG_VERBOSE, "\n");
2107 type = NULL; /* We don't need it any more. */
2109 /* Return if we have no intention of further downloading. */
2110 if (!(*dt & RETROKF) || head_only)
2112 /* In case the caller cares to look... */
2117 /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
2118 servers not to send body in response to a HEAD request, and
2119 those that do will likely be caught by test_socket_open.
2120 If not, they can be worked around using
2121 `--no-http-keep-alive'. */
2122 CLOSE_FINISH (sock);
2123 else if (keep_alive && skip_short_body (sock, contlen))
2124 /* Successfully skipped the body; also keep using the socket. */
2125 CLOSE_FINISH (sock);
2127 CLOSE_INVALIDATE (sock);
2128 return RETRFINISHED;
2131 /* Open the local file. */
2134 mkalldirs (hs->local_file);
2136 rotate_backups (hs->local_file);
2138 fp = fopen (hs->local_file, "ab");
2139 else if (ALLOW_CLOBBER)
2140 fp = fopen (hs->local_file, "wb");
2143 fp = fopen_excl (hs->local_file, true);
2144 if (!fp && errno == EEXIST)
2146 /* We cannot just invent a new name and use it (which is
2147 what functions like unique_create typically do)
2148 because we told the user we'd use this name.
2149 Instead, return and retry the download. */
2150 logprintf (LOG_NOTQUIET,
2151 _("%s has sprung into existence.\n"),
2153 CLOSE_INVALIDATE (sock);
2154 return FOPEN_EXCL_ERR;
2159 logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
2160 CLOSE_INVALIDATE (sock);
2167 /* Print fetch message, if opt.verbose. */
2170 logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
2171 HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
2174 /* This confuses the timestamping code that checks for file size.
2175 #### The timestamping code should be smarter about file size. */
2176 if (opt.save_headers && hs->restval == 0)
2177 fwrite (head, 1, strlen (head), fp);
2179 /* Now we no longer need to store the response header. */
2182 /* Download the request body. */
2185 /* If content-length is present, read that much; otherwise, read
2186 until EOF. The HTTP spec doesn't require the server to
2187 actually close the connection when it's done sending data. */
2188 flags |= rb_read_exactly;
2189 if (hs->restval > 0 && contrange == 0)
2190 /* If the server ignored our range request, instruct fd_read_body
2191 to skip the first RESTVAL bytes of body. */
2192 flags |= rb_skip_startpos;
2193 hs->len = hs->restval;
2195 hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
2196 hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
2200 CLOSE_FINISH (sock);
2204 hs->rderrmsg = xstrdup (fd_errstr (sock));
2205 CLOSE_INVALIDATE (sock);
2212 return RETRFINISHED;
2215 /* The genuine HTTP loop! This is the part where the retrieval is
2216 retried, and retried, and retried, and... */
2218 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
2219 int *dt, struct url *proxy)
2222 bool got_head = false; /* used for time-stamping and filename detection */
2223 bool got_name = false;
2226 uerr_t err, ret = TRYLIMEXC;
2227 time_t tmr = -1; /* remote time-stamp */
2228 wgint local_size = 0; /* the size of the local file */
2229 struct http_stat hstat; /* HTTP status */
2232 /* Assert that no value for *LOCAL_FILE was passed. */
2233 assert (local_file == NULL || *local_file == NULL);
2235 /* Set LOCAL_FILE parameter. */
2236 if (local_file && opt.output_document)
2237 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2239 /* Reset NEWLOC parameter. */
2242 /* This used to be done in main(), but it's a better idea to do it
2243 here so that we don't go through the hoops if we're just using
2248 /* Warn on (likely bogus) wildcard usage in HTTP. */
2249 if (opt.ftp_glob && has_wildcards_p (u->path))
2250 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2252 /* Setup hstat struct. */
2254 hstat.referer = referer;
2256 if (opt.output_document)
2258 hstat.local_file = xstrdup (opt.output_document);
2262 /* Reset the counter. */
2265 /* Reset the document type. */
2271 /* Increment the pass counter. */
2273 sleep_between_retrievals (count);
2275 /* Get the current time string. */
2276 tms = time_str (time (NULL));
2278 if (opt.spider && !got_head)
2279 logprintf (LOG_VERBOSE, _("\
2280 Spider mode enabled. Check if remote file exists.\n"));
2282 /* Print fetch message, if opt.verbose. */
2285 char *hurl = url_string (u, true);
2290 sprintf (tmp, _("(try:%2d)"), count);
2291 logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
2296 logprintf (LOG_NOTQUIET, "--%s-- %s\n",
2301 ws_changetitle (hurl);
2306 /* Default document type is empty. However, if spider mode is
2307 on or time-stamping is employed, HEAD_ONLY commands is
2308 encoded within *dt. */
2309 if (((opt.spider || opt.timestamping) && !got_head)
2310 || (opt.always_rest && !got_name))
2315 /* Decide whether or not to restart. */
2318 && stat (hstat.local_file, &st) == 0
2319 && S_ISREG (st.st_mode))
2320 /* When -c is used, continue from on-disk size. (Can't use
2321 hstat.len even if count>1 because we don't want a failed
2322 first attempt to clobber existing data.) */
2323 hstat.restval = st.st_size;
2325 /* otherwise, continue where the previous try left off */
2326 hstat.restval = hstat.len;
2330 /* Decide whether to send the no-cache directive. We send it in
2332 a) we're using a proxy, and we're past our first retrieval.
2333 Some proxies are notorious for caching incomplete data, so
2334 we require a fresh get.
2335 b) caching is explicitly inhibited. */
2336 if ((proxy && count > 1) /* a */
2337 || !opt.allow_cache) /* b */
2338 *dt |= SEND_NOCACHE;
2340 *dt &= ~SEND_NOCACHE;
2342 /* Try fetching the document, or at least its head. */
2343 err = gethttp (u, &hstat, dt, proxy);
2346 tms = time_str (time (NULL));
2348 /* Get the new location (with or without the redirection). */
2350 *newloc = xstrdup (hstat.newloc);
2354 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2355 case CONERROR: case READERR: case WRITEFAILED:
2356 case RANGEERR: case FOPEN_EXCL_ERR:
2357 /* Non-fatal errors continue executing the loop, which will
2358 bring them to "while" statement at the end, to judge
2359 whether the number of tries was exceeded. */
2360 printwhat (count, opt.ntry);
2362 case FWRITEERR: case FOPENERR:
2363 /* Another fatal error. */
2364 logputs (LOG_VERBOSE, "\n");
2365 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2366 hstat.local_file, strerror (errno));
2367 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2368 case SSLINITFAILED: case CONTNOTSUPPORTED:
2369 /* Fatal errors just return from the function. */
2373 /* Another fatal error. */
2374 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2378 /* Return the new location to the caller. */
2381 logprintf (LOG_NOTQUIET,
2382 _("ERROR: Redirection (%d) without location.\n"),
2392 /* The file was already fully retrieved. */
2396 /* Deal with you later. */
2399 /* All possibilities should have been exhausted. */
2403 if (!(*dt & RETROKF))
2408 /* #### Ugly ugly ugly! */
2409 hurl = url_string (u, true);
2410 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2412 /* Maybe we should always keep track of broken links, not just in
2416 /* #### Again: ugly ugly ugly! */
2418 hurl = url_string (u, true);
2419 nonexisting_url (hurl);
2420 logprintf (LOG_NOTQUIET, _("\
2421 Remote file does not exist -- broken link!!!\n"));
2425 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2426 tms, hstat.statcode, escnonprint (hstat.error));
2428 logputs (LOG_VERBOSE, "\n");
2434 /* Did we get the time-stamp? */
2437 bool restart_loop = false;
2439 if (opt.timestamping && !hstat.remote_time)
2441 logputs (LOG_NOTQUIET, _("\
2442 Last-modified header missing -- time-stamps turned off.\n"));
2444 else if (hstat.remote_time)
2446 /* Convert the date-string into struct tm. */
2447 tmr = http_atotm (hstat.remote_time);
2448 if (tmr == (time_t) (-1))
2449 logputs (LOG_VERBOSE, _("\
2450 Last-modified header invalid -- time-stamp ignored.\n"));
2453 /* The time-stamping section. */
2454 if (opt.timestamping)
2456 if (hstat.orig_file_name) /* Perform the following checks only
2457 if the file we're supposed to
2458 download already exists. */
2460 if (hstat.remote_time &&
2461 tmr != (time_t) (-1))
2463 /* Now time-stamping can be used validly. Time-stamping
2464 means that if the sizes of the local and remote file
2465 match, and local file is newer than the remote file,
2466 it will not be retrieved. Otherwise, the normal
2467 download procedure is resumed. */
2468 if (hstat.orig_file_tstamp >= tmr)
2470 if (hstat.contlen == -1
2471 || hstat.orig_file_size == hstat.contlen)
2473 logprintf (LOG_VERBOSE, _("\
2474 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2475 hstat.orig_file_name);
2481 logprintf (LOG_VERBOSE, _("\
2482 The sizes do not match (local %s) -- retrieving.\n"),
2483 number_to_static_string (local_size));
2487 logputs (LOG_VERBOSE,
2488 _("Remote file is newer, retrieving.\n"));
2490 logputs (LOG_VERBOSE, "\n");
2494 /* free_hstat (&hstat); */
2495 hstat.timestamp_checked = true;
2496 restart_loop = true;
2499 if (opt.always_rest)
2502 restart_loop = true;
2511 logputs (LOG_VERBOSE, _("\
2512 Remote file exists and could contain links to other resources -- retrieving.\n\n"));
2513 restart_loop = true;
2517 logprintf (LOG_VERBOSE, _("\
2518 Remote file exists but does not contain any link -- not retrieving.\n\n"));
2525 logprintf (LOG_VERBOSE, _("\
2526 Remote file exists but recursion is disabled -- not retrieving.\n\n"));
2532 got_head = true; /* no more time-stamping */
2534 count = 0; /* the retrieve count for HEAD is reset */
2540 if ((tmr != (time_t) (-1))
2541 && ((hstat.len == hstat.contlen) ||
2542 ((hstat.res == 0) && (hstat.contlen == -1))))
2544 /* #### This code repeats in http.c and ftp.c. Move it to a
2546 const char *fl = NULL;
2547 if (opt.output_document)
2549 if (output_stream_regular)
2550 fl = opt.output_document;
2553 fl = hstat.local_file;
2557 /* End of time-stamping section. */
2559 tmrate = retr_rate (hstat.rd_size, hstat.dltime);
2560 total_download_time += hstat.dltime;
2562 if (hstat.len == hstat.contlen)
2566 logprintf (LOG_VERBOSE,
2567 _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2568 tms, tmrate, hstat.local_file,
2569 number_to_static_string (hstat.len),
2570 number_to_static_string (hstat.contlen));
2571 logprintf (LOG_NONVERBOSE,
2572 "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2574 number_to_static_string (hstat.len),
2575 number_to_static_string (hstat.contlen),
2576 hstat.local_file, count);
2579 total_downloaded_bytes += hstat.len;
2581 /* Remember that we downloaded the file for later ".orig" code. */
2582 if (*dt & ADDED_HTML_EXTENSION)
2583 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2585 downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2590 else if (hstat.res == 0) /* No read error */
2592 if (hstat.contlen == -1) /* We don't know how much we were supposed
2593 to get, so assume we succeeded. */
2597 logprintf (LOG_VERBOSE,
2598 _("%s (%s) - `%s' saved [%s]\n\n"),
2599 tms, tmrate, hstat.local_file,
2600 number_to_static_string (hstat.len));
2601 logprintf (LOG_NONVERBOSE,
2602 "%s URL:%s [%s] -> \"%s\" [%d]\n",
2603 tms, u->url, number_to_static_string (hstat.len),
2604 hstat.local_file, count);
2607 total_downloaded_bytes += hstat.len;
2609 /* Remember that we downloaded the file for later ".orig" code. */
2610 if (*dt & ADDED_HTML_EXTENSION)
2611 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2613 downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2618 else if (hstat.len < hstat.contlen) /* meaning we lost the
2619 connection too soon */
2621 logprintf (LOG_VERBOSE,
2622 _("%s (%s) - Connection closed at byte %s. "),
2623 tms, tmrate, number_to_static_string (hstat.len));
2624 printwhat (count, opt.ntry);
2628 /* Getting here would mean reading more data than
2629 requested with content-length, which we never do. */
2632 else /* from now on hstat.res can only be -1 */
2634 if (hstat.contlen == -1)
2636 logprintf (LOG_VERBOSE,
2637 _("%s (%s) - Read error at byte %s (%s)."),
2638 tms, tmrate, number_to_static_string (hstat.len),
2640 printwhat (count, opt.ntry);
2643 else /* hstat.res == -1 and contlen is given */
2645 logprintf (LOG_VERBOSE,
2646 _("%s (%s) - Read error at byte %s/%s (%s). "),
2648 number_to_static_string (hstat.len),
2649 number_to_static_string (hstat.contlen),
2651 printwhat (count, opt.ntry);
2657 while (!opt.ntry || (count < opt.ntry));
2661 *local_file = xstrdup (hstat.local_file);
2662 free_hstat (&hstat);
2667 /* Check whether the result of strptime() indicates success.
2668 strptime() returns the pointer to how far it got to in the string.
2669 The processing has been successful if the string is at `GMT' or
2670 `+X', or at the end of the string.
2672 In extended regexp parlance, the function returns 1 if P matches
2673 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2674 can return) is considered a failure and 0 is returned. */
2676 check_end (const char *p)
2680 while (ISSPACE (*p))
2683 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2684 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2690 /* Convert the textual specification of time in TIME_STRING to the
2691 number of seconds since the Epoch.
2693 TIME_STRING can be in any of the three formats RFC2616 allows the
2694 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2695 as well as the time format used in the Set-Cookie header.
2696 Timezones are ignored, and should be GMT.
2698 Return the computed time_t representation, or -1 if the conversion
2701 This function uses strptime with various string formats for parsing
2702 TIME_STRING. This results in a parser that is not as lenient in
2703 interpreting TIME_STRING as I would like it to be. Being based on
2704 strptime, it always allows shortened months, one-digit days, etc.,
2705 but due to the multitude of formats in which time can be
2706 represented, an ideal HTTP time parser would be even more
2707 forgiving. It should completely ignore things like week days and
2708 concentrate only on the various forms of representing years,
2709 months, days, hours, minutes, and seconds. For example, it would
2710 be nice if it accepted ISO 8601 out of the box.
2712 I've investigated free and PD code for this purpose, but none was
2713 usable. getdate was big and unwieldy, and had potential copyright
2714 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2715 distributed with phttpd, is excellent, but we cannot use it because
2716 it is not assigned to the FSF. So I stuck it with strptime. */
2719 http_atotm (const char *time_string)
2721 /* NOTE: Solaris strptime man page claims that %n and %t match white
2722 space, but that's not universally available. Instead, we simply
2723 use ` ' to mean "skip all WS", which works under all strptime
2724 implementations I've tested. */
2726 static const char *time_formats[] = {
2727 "%a, %d %b %Y %T", /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2728 "%A, %d-%b-%y %T", /* rfc850: Thursday, 29-Jan-98 22:12:57 */
2729 "%a %b %d %T %Y", /* asctime: Thu Jan 29 22:12:57 1998 */
2730 "%a, %d-%b-%Y %T" /* cookies: Thu, 29-Jan-1998 22:12:57
2731 (used in Set-Cookie, defined in the
2732 Netscape cookie specification.) */
2734 const char *oldlocale;
2736 time_t ret = (time_t) -1;
2738 /* Solaris strptime fails to recognize English month names in
2739 non-English locales, which we work around by temporarily setting
2740 locale to C before invoking strptime. */
2741 oldlocale = setlocale (LC_TIME, NULL);
2742 setlocale (LC_TIME, "C");
2744 for (i = 0; i < countof (time_formats); i++)
2748 /* Some versions of strptime use the existing contents of struct
2749 tm to recalculate the date according to format. Zero it out
2750 to prevent stack garbage from influencing strptime. */
2753 if (check_end (strptime (time_string, time_formats[i], &t)))
2760 /* Restore the previous locale. */
2761 setlocale (LC_TIME, oldlocale);
2766 /* Authorization support: We support three authorization schemes:
2768 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2770 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2771 consisting of answering to the server's challenge with the proper
2774 * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2775 Stenberg for libcurl. Like digest, NTLM is based on a
2776 challenge-response mechanism, but unlike digest, it is non-standard
2777 (authenticates TCP connections rather than requests), undocumented
2778 and Microsoft-specific. */
2780 /* Create the authentication header contents for the `Basic' scheme.
2781 This is done by encoding the string "USER:PASS" to base64 and
2782 prepending the string "Basic " in front of it. */
2785 basic_authentication_encode (const char *user, const char *passwd)
2788 int len1 = strlen (user) + 1 + strlen (passwd);
2790 t1 = (char *)alloca (len1 + 1);
2791 sprintf (t1, "%s:%s", user, passwd);
2793 t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2794 base64_encode (t1, len1, t2);
2796 return concat_strings ("Basic ", t2, (char *) 0);
2799 #define SKIP_WS(x) do { \
2800 while (ISSPACE (*(x))) \
2804 #ifdef ENABLE_DIGEST
2805 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2806 an array of 16 bytes containing the hash keys, and BUF should be a
2807 buffer of 33 writable characters (32 for hex digits plus one for
2808 zero termination). */
2810 dump_hash (char *buf, const unsigned char *hash)
2814 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2816 *buf++ = XNUM_TO_digit (*hash >> 4);
2817 *buf++ = XNUM_TO_digit (*hash & 0xf);
2822 /* Take the line apart to find the challenge, and compose a digest
2823 authorization header. See RFC2069 section 2.1.2. */
2825 digest_authentication_encode (const char *au, const char *user,
2826 const char *passwd, const char *method,
2829 static char *realm, *opaque, *nonce;
2834 { "realm", &realm },
2835 { "opaque", &opaque },
2839 param_token name, value;
2841 realm = opaque = nonce = NULL;
2843 au += 6; /* skip over `Digest' */
2844 while (extract_param (&au, &name, &value, ','))
2847 for (i = 0; i < countof (options); i++)
2848 if (name.e - name.b == strlen (options[i].name)
2849 && 0 == strncmp (name.b, options[i].name, name.e - name.b))
2851 *options[i].variable = strdupdelim (value.b, value.e);
2855 if (!realm || !nonce || !user || !passwd || !path || !method)
2858 xfree_null (opaque);
2863 /* Calculate the digest value. */
2865 ALLOCA_MD5_CONTEXT (ctx);
2866 unsigned char hash[MD5_HASHLEN];
2867 char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2868 char response_digest[MD5_HASHLEN * 2 + 1];
2870 /* A1BUF = H(user ":" realm ":" password) */
2872 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2873 gen_md5_update ((unsigned char *)":", 1, ctx);
2874 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2875 gen_md5_update ((unsigned char *)":", 1, ctx);
2876 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2877 gen_md5_finish (ctx, hash);
2878 dump_hash (a1buf, hash);
2880 /* A2BUF = H(method ":" path) */
2882 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2883 gen_md5_update ((unsigned char *)":", 1, ctx);
2884 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2885 gen_md5_finish (ctx, hash);
2886 dump_hash (a2buf, hash);
2888 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2890 gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
2891 gen_md5_update ((unsigned char *)":", 1, ctx);
2892 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2893 gen_md5_update ((unsigned char *)":", 1, ctx);
2894 gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
2895 gen_md5_finish (ctx, hash);
2896 dump_hash (response_digest, hash);
2898 res = xmalloc (strlen (user)
2903 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2904 + (opaque ? strlen (opaque) : 0)
2906 sprintf (res, "Digest \
2907 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2908 user, realm, nonce, path, response_digest);
2911 char *p = res + strlen (res);
2912 strcat (p, ", opaque=\"");
2919 #endif /* ENABLE_DIGEST */
2921 /* Computing the size of a string literal must take into account that
2922 value returned by sizeof includes the terminating \0. */
2923 #define STRSIZE(literal) (sizeof (literal) - 1)
2925 /* Whether chars in [b, e) begin with the literal string provided as
2926 first argument and are followed by whitespace or terminating \0.
2927 The comparison is case-insensitive. */
2928 #define STARTS(literal, b, e) \
2929 ((e) - (b) >= STRSIZE (literal) \
2930 && 0 == strncasecmp (b, literal, STRSIZE (literal)) \
2931 && ((e) - (b) == STRSIZE (literal) \
2932 || ISSPACE (b[STRSIZE (literal)])))
2935 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2937 return STARTS ("Basic", hdrbeg, hdrend)
2938 #ifdef ENABLE_DIGEST
2939 || STARTS ("Digest", hdrbeg, hdrend)
2942 || STARTS ("NTLM", hdrbeg, hdrend)
2949 /* Create the HTTP authorization request header. When the
2950 `WWW-Authenticate' response header is seen, according to the
2951 authorization scheme specified in that header (`Basic' and `Digest'
2952 are supported by the current implementation), produce an
2953 appropriate HTTP authorization request header. */
2955 create_authorization_line (const char *au, const char *user,
2956 const char *passwd, const char *method,
2957 const char *path, bool *finished)
2959 /* We are called only with known schemes, so we can dispatch on the
2961 switch (TOUPPER (*au))
2963 case 'B': /* Basic */
2965 return basic_authentication_encode (user, passwd);
2966 #ifdef ENABLE_DIGEST
2967 case 'D': /* Digest */
2969 return digest_authentication_encode (au, user, passwd, method, path);
2972 case 'N': /* NTLM */
2973 if (!ntlm_input (&pconn.ntlm, au))
2978 return ntlm_output (&pconn.ntlm, user, passwd, finished);
2981 /* We shouldn't get here -- this function should be only called
2982 with values approved by known_authentication_scheme_p. */
2990 if (!wget_cookie_jar)
2991 wget_cookie_jar = cookie_jar_new ();
2992 if (opt.cookies_input && !cookies_loaded_p)
2994 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
2995 cookies_loaded_p = true;
3002 if (wget_cookie_jar)
3003 cookie_jar_save (wget_cookie_jar, opt.cookies_output);
3009 xfree_null (pconn.host);
3010 if (wget_cookie_jar)
3011 cookie_jar_delete (wget_cookie_jar);
3015 ensure_extension (struct http_stat *hs, const char *ext, int *dt)
3017 char *last_period_in_local_filename = strrchr (hs->local_file, '.');
3019 int len = strlen (ext);
3022 strncpy (shortext, ext, len - 1);
3023 shortext[len - 2] = '\0';
3026 if (last_period_in_local_filename == NULL
3027 || !(0 == strcasecmp (last_period_in_local_filename, shortext)
3028 || 0 == strcasecmp (last_period_in_local_filename, ext)))
3030 int local_filename_len = strlen (hs->local_file);
3031 /* Resize the local file, allowing for ".html" preceded by
3032 optional ".NUMBER". */
3033 hs->local_file = xrealloc (hs->local_file,
3034 local_filename_len + 24 + len);
3035 strcpy (hs->local_file + local_filename_len, ext);
3036 /* If clobbering is not allowed and the file, as named,
3037 exists, tack on ".NUMBER.html" instead. */
3038 if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
3042 sprintf (hs->local_file + local_filename_len,
3043 ".%d%s", ext_num++, ext);
3044 while (file_exists_p (hs->local_file));
3046 *dt |= ADDED_HTML_EXTENSION;
3054 test_parse_content_disposition()
3062 { "filename=\"file.ext\"", "file.ext", true },
3063 { "attachment; filename=\"file.ext\"", "file.ext", true },
3064 { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
3065 { "attachment", NULL, false },
3068 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
3071 bool res = parse_content_disposition (test_array[i].hdrval, &filename);
3073 mu_assert ("test_parse_content_disposition: wrong result",
3074 res == test_array[i].result
3076 || 0 == strcmp (test_array[i].filename, filename)));
3082 #endif /* TESTING */