2 Copyright (C) 1996-2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
55 # include "http-ntlm.h"
68 extern char *version_string;
71 # define MIN(x, y) ((x) > (y) ? (y) : (x))
75 static bool cookies_loaded_p;
76 static struct cookie_jar *wget_cookie_jar;
78 #define TEXTHTML_S "text/html"
79 #define TEXTXHTML_S "application/xhtml+xml"
81 /* Some status code validation macros: */
82 #define H_20X(x) (((x) >= 200) && ((x) < 300))
83 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
84 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
85 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
86 || (x) == HTTP_STATUS_SEE_OTHER \
87 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
89 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
91 #define HTTP_STATUS_OK 200
92 #define HTTP_STATUS_CREATED 201
93 #define HTTP_STATUS_ACCEPTED 202
94 #define HTTP_STATUS_NO_CONTENT 204
95 #define HTTP_STATUS_PARTIAL_CONTENTS 206
97 /* Redirection 3xx. */
98 #define HTTP_STATUS_MULTIPLE_CHOICES 300
99 #define HTTP_STATUS_MOVED_PERMANENTLY 301
100 #define HTTP_STATUS_MOVED_TEMPORARILY 302
101 #define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */
102 #define HTTP_STATUS_NOT_MODIFIED 304
103 #define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */
105 /* Client error 4xx. */
106 #define HTTP_STATUS_BAD_REQUEST 400
107 #define HTTP_STATUS_UNAUTHORIZED 401
108 #define HTTP_STATUS_FORBIDDEN 403
109 #define HTTP_STATUS_NOT_FOUND 404
110 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
112 /* Server errors 5xx. */
113 #define HTTP_STATUS_INTERNAL 500
114 #define HTTP_STATUS_NOT_IMPLEMENTED 501
115 #define HTTP_STATUS_BAD_GATEWAY 502
116 #define HTTP_STATUS_UNAVAILABLE 503
119 rel_none, rel_name, rel_value, rel_both
126 struct request_header {
128 enum rp release_policy;
130 int hcount, hcapacity;
133 /* Create a new, empty request. At least request_set_method must be
134 called before the request can be used. */
136 static struct request *
139 struct request *req = xnew0 (struct request);
141 req->headers = xnew_array (struct request_header, req->hcapacity);
145 /* Set the request's method and its arguments. METH should be a
146 literal string (or it should outlive the request) because it will
147 not be freed. ARG will be freed by request_free. */
150 request_set_method (struct request *req, const char *meth, char *arg)
156 /* Return the method string passed with the last call to
157 request_set_method. */
160 request_method (const struct request *req)
165 /* Free one header according to the release policy specified with
166 request_set_header. */
169 release_header (struct request_header *hdr)
171 switch (hdr->release_policy)
188 /* Set the request named NAME to VALUE. Specifically, this means that
189 a "NAME: VALUE\r\n" header line will be used in the request. If a
190 header with the same name previously existed in the request, its
191 value will be replaced by this one. A NULL value means do nothing.
193 RELEASE_POLICY determines whether NAME and VALUE should be released
194 (freed) with request_free. Allowed values are:
196 - rel_none - don't free NAME or VALUE
197 - rel_name - free NAME when done
198 - rel_value - free VALUE when done
199 - rel_both - free both NAME and VALUE when done
201 Setting release policy is useful when arguments come from different
202 sources. For example:
204 // Don't free literal strings!
205 request_set_header (req, "Pragma", "no-cache", rel_none);
207 // Don't free a global variable, we'll need it later.
208 request_set_header (req, "Referer", opt.referer, rel_none);
210 // Value freshly allocated, free it when done.
211 request_set_header (req, "Range",
212 aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
217 request_set_header (struct request *req, char *name, char *value,
218 enum rp release_policy)
220 struct request_header *hdr;
225 /* A NULL value is a no-op; if freeing the name is requested,
226 free it now to avoid leaks. */
227 if (release_policy == rel_name || release_policy == rel_both)
232 for (i = 0; i < req->hcount; i++)
234 hdr = &req->headers[i];
235 if (0 == strcasecmp (name, hdr->name))
237 /* Replace existing header. */
238 release_header (hdr);
241 hdr->release_policy = release_policy;
246 /* Install new header. */
248 if (req->hcount >= req->hcapacity)
250 req->hcapacity <<= 1;
251 req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
253 hdr = &req->headers[req->hcount++];
256 hdr->release_policy = release_policy;
259 /* Like request_set_header, but sets the whole header line, as
260 provided by the user using the `--header' option. For example,
261 request_set_user_header (req, "Foo: bar") works just like
262 request_set_header (req, "Foo", "bar"). */
265 request_set_user_header (struct request *req, const char *header)
268 const char *p = strchr (header, ':');
271 BOUNDED_TO_ALLOCA (header, p, name);
275 request_set_header (req, xstrdup (name), (char *) p, rel_name);
278 /* Remove the header with specified name from REQ. Returns true if
279 the header was actually removed, false otherwise. */
282 request_remove_header (struct request *req, char *name)
285 for (i = 0; i < req->hcount; i++)
287 struct request_header *hdr = &req->headers[i];
288 if (0 == strcasecmp (name, hdr->name))
290 release_header (hdr);
291 /* Move the remaining headers by one. */
292 if (i < req->hcount - 1)
293 memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
301 #define APPEND(p, str) do { \
302 int A_len = strlen (str); \
303 memcpy (p, str, A_len); \
307 /* Construct the request and write it to FD using fd_write. */
310 request_send (const struct request *req, int fd)
312 char *request_string, *p;
313 int i, size, write_error;
315 /* Count the request size. */
318 /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
319 size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
321 for (i = 0; i < req->hcount; i++)
323 struct request_header *hdr = &req->headers[i];
324 /* NAME ": " VALUE "\r\n" */
325 size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
331 p = request_string = alloca_array (char, size);
333 /* Generate the request. */
335 APPEND (p, req->method); *p++ = ' ';
336 APPEND (p, req->arg); *p++ = ' ';
337 memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
339 for (i = 0; i < req->hcount; i++)
341 struct request_header *hdr = &req->headers[i];
342 APPEND (p, hdr->name);
343 *p++ = ':', *p++ = ' ';
344 APPEND (p, hdr->value);
345 *p++ = '\r', *p++ = '\n';
348 *p++ = '\r', *p++ = '\n', *p++ = '\0';
349 assert (p - request_string == size);
353 DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
355 /* Send the request to the server. */
357 write_error = fd_write (fd, request_string, size - 1, -1);
359 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
364 /* Release the resources used by REQ. */
367 request_free (struct request *req)
370 xfree_null (req->arg);
371 for (i = 0; i < req->hcount; i++)
372 release_header (&req->headers[i]);
373 xfree_null (req->headers);
377 /* Send the contents of FILE_NAME to SOCK. Make sure that exactly
378 PROMISED_SIZE bytes are sent over the wire -- if the file is
379 longer, read only that much; if the file is shorter, report an error. */
382 post_file (int sock, const char *file_name, wgint promised_size)
384 static char chunk[8192];
389 DEBUGP (("[writing POST file %s ... ", file_name));
391 fp = fopen (file_name, "rb");
394 while (!feof (fp) && written < promised_size)
397 int length = fread (chunk, 1, sizeof (chunk), fp);
400 towrite = MIN (promised_size - written, length);
401 write_error = fd_write (sock, chunk, towrite, -1);
411 /* If we've written less than was promised, report a (probably
412 nonsensical) error rather than break the promise. */
413 if (written < promised_size)
419 assert (written == promised_size);
420 DEBUGP (("done]\n"));
424 /* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
425 If so, return the pointer to the position after the line, otherwise
426 return NULL. This is used as callback to fd_read_hunk. The data
427 between START and PEEKED has been read and cannot be "unread"; the
428 data after PEEKED has only been peeked. */
431 response_head_terminator (const char *start, const char *peeked, int peeklen)
435 /* If at first peek, verify whether HUNK starts with "HTTP". If
436 not, this is a HTTP/0.9 request and we must bail out without
438 if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
441 /* Look for "\n[\r]\n", and return the following position if found.
442 Start two chars before the current to cover the possibility that
443 part of the terminator (e.g. "\n\r") arrived in the previous
445 p = peeked - start < 2 ? start : peeked - 2;
446 end = peeked + peeklen;
448 /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
449 for (; p < end - 2; p++)
452 if (p[1] == '\r' && p[2] == '\n')
454 else if (p[1] == '\n')
457 /* p==end-2: check for \n\n directly preceding END. */
458 if (p[0] == '\n' && p[1] == '\n')
464 /* The maximum size of a single HTTP response we care to read. Rather
465 than being a limit of the reader implementation, this limit
466 prevents Wget from slurping all available memory upon encountering
467 malicious or buggy server output, thus protecting the user. Define
468 it to 0 to remove the limit. */
470 #define HTTP_RESPONSE_MAX_SIZE 65536
472 /* Read the HTTP request head from FD and return it. The error
473 conditions are the same as with fd_read_hunk.
475 To support HTTP/0.9 responses, this function tries to make sure
476 that the data begins with "HTTP". If this is not the case, no data
477 is read and an empty request is returned, so that the remaining
478 data can be treated as body. */
481 read_http_response_head (int fd)
483 return fd_read_hunk (fd, response_head_terminator, 512,
484 HTTP_RESPONSE_MAX_SIZE);
488 /* The response data. */
491 /* The array of pointers that indicate where each header starts.
492 For example, given this HTTP response:
499 The headers are located like this:
501 "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
503 headers[0] headers[1] headers[2] headers[3]
505 I.e. headers[0] points to the beginning of the request,
506 headers[1] points to the end of the first header and the
507 beginning of the second one, etc. */
509 const char **headers;
512 /* Create a new response object from the text of the HTTP response,
513 available in HEAD. That text is automatically split into
514 constituent header lines for fast retrieval using
517 static struct response *
518 resp_new (const char *head)
523 struct response *resp = xnew0 (struct response);
528 /* Empty head means that we're dealing with a headerless
529 (HTTP/0.9) response. In that case, don't set HEADERS at
534 /* Split HEAD into header lines, so that resp_header_* functions
535 don't need to do this over and over again. */
541 DO_REALLOC (resp->headers, size, count + 1, const char *);
542 resp->headers[count++] = hdr;
544 /* Break upon encountering an empty line. */
545 if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
548 /* Find the end of HDR, including continuations. */
551 const char *end = strchr (hdr, '\n');
557 while (*hdr == ' ' || *hdr == '\t');
559 DO_REALLOC (resp->headers, size, count + 1, const char *);
560 resp->headers[count] = NULL;
565 /* Locate the header named NAME in the request data, starting with
566 position START. This allows the code to loop through the request
567 data, filtering for all requests of a given name. Returns the
568 found position, or -1 for failure. The code that uses this
569 function typically looks like this:
571 for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
572 ... do something with header ...
574 If you only care about one header, use resp_header_get instead of
578 resp_header_locate (const struct response *resp, const char *name, int start,
579 const char **begptr, const char **endptr)
582 const char **headers = resp->headers;
585 if (!headers || !headers[1])
588 name_len = strlen (name);
594 for (; headers[i + 1]; i++)
596 const char *b = headers[i];
597 const char *e = headers[i + 1];
599 && b[name_len] == ':'
600 && 0 == strncasecmp (b, name, name_len))
603 while (b < e && ISSPACE (*b))
605 while (b < e && ISSPACE (e[-1]))
615 /* Find and retrieve the header named NAME in the request data. If
616 found, set *BEGPTR to its starting, and *ENDPTR to its ending
617 position, and return true. Otherwise return false.
619 This function is used as a building block for resp_header_copy
620 and resp_header_strdup. */
623 resp_header_get (const struct response *resp, const char *name,
624 const char **begptr, const char **endptr)
626 int pos = resp_header_locate (resp, name, 0, begptr, endptr);
630 /* Copy the response header named NAME to buffer BUF, no longer than
631 BUFSIZE (BUFSIZE includes the terminating 0). If the header
632 exists, true is returned, false otherwise. If there should be no
633 limit on the size of the header, use resp_header_strdup instead.
635 If BUFSIZE is 0, no data is copied, but the boolean indication of
636 whether the header is present is still returned. */
639 resp_header_copy (const struct response *resp, const char *name,
640 char *buf, int bufsize)
643 if (!resp_header_get (resp, name, &b, &e))
647 int len = MIN (e - b, bufsize - 1);
648 memcpy (buf, b, len);
654 /* Return the value of header named NAME in RESP, allocated with
655 malloc. If such a header does not exist in RESP, return NULL. */
658 resp_header_strdup (const struct response *resp, const char *name)
661 if (!resp_header_get (resp, name, &b, &e))
663 return strdupdelim (b, e);
666 /* Parse the HTTP status line, which is of format:
668 HTTP-Version SP Status-Code SP Reason-Phrase
670 The function returns the status-code, or -1 if the status line
671 appears malformed. The pointer to "reason-phrase" message is
672 returned in *MESSAGE. */
675 resp_status (const struct response *resp, char **message)
682 /* For a HTTP/0.9 response, assume status 200. */
684 *message = xstrdup (_("No headers, assuming HTTP/0.9"));
688 p = resp->headers[0];
689 end = resp->headers[1];
695 if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
699 /* Match the HTTP version. This is optional because Gnutella
700 servers have been reported to not specify HTTP version. */
701 if (p < end && *p == '/')
704 while (p < end && ISDIGIT (*p))
706 if (p < end && *p == '.')
708 while (p < end && ISDIGIT (*p))
712 while (p < end && ISSPACE (*p))
714 if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
717 status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
722 while (p < end && ISSPACE (*p))
724 while (p < end && ISSPACE (end[-1]))
726 *message = strdupdelim (p, end);
732 /* Release the resources used by RESP. */
735 resp_free (struct response *resp)
737 xfree_null (resp->headers);
741 /* Print the server response, line by line, omitting the trailing CRLF
742 from individual header lines, and prefixed with PREFIX. */
745 print_server_response (const struct response *resp, const char *prefix)
750 for (i = 0; resp->headers[i + 1]; i++)
752 const char *b = resp->headers[i];
753 const char *e = resp->headers[i + 1];
755 if (b < e && e[-1] == '\n')
757 if (b < e && e[-1] == '\r')
759 /* This is safe even on printfs with broken handling of "%.<n>s"
760 because resp->headers ends with \0. */
761 logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
765 /* Parse the `Content-Range' header and extract the information it
766 contains. Returns true if successful, false otherwise. */
768 parse_content_range (const char *hdr, wgint *first_byte_ptr,
769 wgint *last_byte_ptr, wgint *entity_length_ptr)
773 /* Ancient versions of Netscape proxy server, presumably predating
774 rfc2068, sent out `Content-Range' without the "bytes"
776 if (0 == strncasecmp (hdr, "bytes", 5))
779 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
783 while (ISSPACE (*hdr))
790 for (num = 0; ISDIGIT (*hdr); hdr++)
791 num = 10 * num + (*hdr - '0');
792 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
794 *first_byte_ptr = num;
796 for (num = 0; ISDIGIT (*hdr); hdr++)
797 num = 10 * num + (*hdr - '0');
798 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
800 *last_byte_ptr = num;
802 for (num = 0; ISDIGIT (*hdr); hdr++)
803 num = 10 * num + (*hdr - '0');
804 *entity_length_ptr = num;
808 /* Read the body of the request, but don't store it anywhere and don't
809 display a progress gauge. This is useful for reading the bodies of
810 administrative responses to which we will soon issue another
811 request. The response is not useful to the user, but reading it
812 allows us to continue using the same connection to the server.
814 If reading fails, false is returned, true otherwise. In debug
815 mode, the body is displayed for debugging purposes. */
818 skip_short_body (int fd, wgint contlen)
821 SKIP_SIZE = 512, /* size of the download buffer */
822 SKIP_THRESHOLD = 4096 /* the largest size we read */
824 char dlbuf[SKIP_SIZE + 1];
825 dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
827 /* We shouldn't get here with unknown contlen. (This will change
828 with HTTP/1.1, which supports "chunked" transfer.) */
829 assert (contlen != -1);
831 /* If the body is too large, it makes more sense to simply close the
832 connection than to try to read the body. */
833 if (contlen > SKIP_THRESHOLD)
836 DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
840 int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
843 /* Don't normally report the error since this is an
844 optimization that should be invisible to the user. */
845 DEBUGP (("] aborting (%s).\n",
846 ret < 0 ? fd_errstr (fd) : "EOF received"));
850 /* Safe even if %.*s bogusly expects terminating \0 because
851 we've zero-terminated dlbuf above. */
852 DEBUGP (("%.*s", ret, dlbuf));
855 DEBUGP (("] done.\n"));
859 /* Extract a parameter from the string (typically an HTTP header) at
860 **SOURCE and advance SOURCE to the next parameter. Return false
861 when there are no more parameters to extract. The name of the
862 parameter is returned in NAME, and the value in VALUE. If the
863 parameter has no value, the token's value is zeroed out.
865 For example, if *SOURCE points to the string "attachment;
866 filename=\"foo bar\"", the first call to this function will return
867 the token named "attachment" and no value, and the second call will
868 return the token named "filename" and value "foo bar". The third
869 call will return false, indicating no more valid tokens. */
872 extract_param (const char **source, param_token *name, param_token *value,
875 const char *p = *source;
877 while (ISSPACE (*p)) ++p;
881 return false; /* no error; nothing more to extract */
886 while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
888 if (name->b == name->e)
889 return false; /* empty name: error */
890 while (ISSPACE (*p)) ++p;
891 if (*p == separator || !*p) /* no value */
894 if (*p == separator) ++p;
899 return false; /* error */
901 /* *p is '=', extract value */
903 while (ISSPACE (*p)) ++p;
904 if (*p == '"') /* quoted */
907 while (*p && *p != '"') ++p;
911 /* Currently at closing quote; find the end of param. */
912 while (ISSPACE (*p)) ++p;
913 while (*p && *p != separator) ++p;
917 /* garbage after closed quote, e.g. foo="bar"baz */
923 while (*p && *p != separator) ++p;
925 while (value->e != value->b && ISSPACE (value->e[-1]))
927 if (*p == separator) ++p;
934 #define MAX(p, q) ((p) > (q) ? (p) : (q))
936 /* Parse the contents of the `Content-Disposition' header, extracting
937 the information useful to Wget. Content-Disposition is a header
938 borrowed from MIME; when used in HTTP, it typically serves for
939 specifying the desired file name of the resource. For example:
941 Content-Disposition: attachment; filename="flora.jpg"
943 Wget will skip the tokens it doesn't care about, such as
944 "attachment" in the previous example; it will also skip other
945 unrecognized params. If the header is syntactically correct and
946 contains a file name, a copy of the file name is stored in
947 *filename and true is returned. Otherwise, the function returns
950 The file name is stripped of directory components and must not be
954 parse_content_disposition (const char *hdr, char **filename)
956 param_token name, value;
957 while (extract_param (&hdr, &name, &value, ';'))
958 if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
960 /* Make the file name begin at the last slash or backslash. */
961 const char *last_slash = memrchr (value.b, '/', value.e - value.b);
962 const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
963 if (last_slash && last_bs)
964 value.b = 1 + MAX (last_slash, last_bs);
965 else if (last_slash || last_bs)
966 value.b = 1 + (last_slash ? last_slash : last_bs);
967 if (value.b == value.e)
969 *filename = strdupdelim (value.b, value.e);
975 /* Persistent connections. Currently, we cache the most recently used
976 connection as persistent, provided that the HTTP server agrees to
977 make it such. The persistence data is stored in the variables
978 below. Ideally, it should be possible to cache an arbitrary fixed
979 number of these connections. */
981 /* Whether a persistent connection is active. */
982 static bool pconn_active;
985 /* The socket of the connection. */
988 /* Host and port of the currently active persistent connection. */
992 /* Whether a ssl handshake has occoured on this connection. */
995 /* Whether the connection was authorized. This is only done by
996 NTLM, which authorizes *connections* rather than individual
997 requests. (That practice is peculiar for HTTP, but it is a
998 useful optimization.) */
1002 /* NTLM data of the current connection. */
1003 struct ntlmdata ntlm;
1007 /* Mark the persistent connection as invalid and free the resources it
1008 uses. This is used by the CLOSE_* macros after they forcefully
1009 close a registered persistent connection. */
1012 invalidate_persistent (void)
1014 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
1015 pconn_active = false;
1016 fd_close (pconn.socket);
1021 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
1022 persistent. This will enable someone to use the same connection
1023 later. In the context of HTTP, this must be called only AFTER the
1024 response has been received and the server has promised that the
1025 connection will remain alive.
1027 If a previous connection was persistent, it is closed. */
1030 register_persistent (const char *host, int port, int fd, bool ssl)
1034 if (pconn.socket == fd)
1036 /* The connection FD is already registered. */
1041 /* The old persistent connection is still active; close it
1042 first. This situation arises whenever a persistent
1043 connection exists, but we then connect to a different
1044 host, and try to register a persistent connection to that
1046 invalidate_persistent ();
1050 pconn_active = true;
1052 pconn.host = xstrdup (host);
1055 pconn.authorized = false;
1057 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
1060 /* Return true if a persistent connection is available for connecting
1064 persistent_available_p (const char *host, int port, bool ssl,
1065 bool *host_lookup_failed)
1067 /* First, check whether a persistent connection is active at all. */
1071 /* If we want SSL and the last connection wasn't or vice versa,
1072 don't use it. Checking for host and port is not enough because
1073 HTTP and HTTPS can apparently coexist on the same port. */
1074 if (ssl != pconn.ssl)
1077 /* If we're not connecting to the same port, we're not interested. */
1078 if (port != pconn.port)
1081 /* If the host is the same, we're in business. If not, there is
1082 still hope -- read below. */
1083 if (0 != strcasecmp (host, pconn.host))
1085 /* Check if pconn.socket is talking to HOST under another name.
1086 This happens often when both sites are virtual hosts
1087 distinguished only by name and served by the same network
1088 interface, and hence the same web server (possibly set up by
1089 the ISP and serving many different web sites). This
1090 admittedly unconventional optimization does not contradict
1091 HTTP and works well with popular server software. */
1095 struct address_list *al;
1098 /* Don't try to talk to two different SSL sites over the same
1099 secure connection! (Besides, it's not clear that
1100 name-based virtual hosting is even possible with SSL.) */
1103 /* If pconn.socket's peer is one of the IP addresses HOST
1104 resolves to, pconn.socket is for all intents and purposes
1105 already talking to HOST. */
1107 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
1109 /* Can't get the peer's address -- something must be very
1110 wrong with the connection. */
1111 invalidate_persistent ();
1114 al = lookup_host (host, 0);
1117 *host_lookup_failed = true;
1121 found = address_list_contains (al, &ip);
1122 address_list_release (al);
1127 /* The persistent connection's peer address was found among the
1128 addresses HOST resolved to; therefore, pconn.sock is in fact
1129 already talking to HOST -- no need to reconnect. */
1132 /* Finally, check whether the connection is still open. This is
1133 important because most servers implement liberal (short) timeout
1134 on persistent connections. Wget can of course always reconnect
1135 if the connection doesn't work out, but it's nicer to know in
1136 advance. This test is a logical followup of the first test, but
1137 is "expensive" and therefore placed at the end of the list.
1139 (Current implementation of test_socket_open has a nice side
1140 effect that it treats sockets with pending data as "closed".
1141 This is exactly what we want: if a broken server sends message
1142 body in response to HEAD, or if it sends more than conent-length
1143 data, we won't reuse the corrupted connection.) */
1145 if (!test_socket_open (pconn.socket))
1147 /* Oops, the socket is no longer open. Now that we know that,
1148 let's invalidate the persistent connection before returning
1150 invalidate_persistent ();
1157 /* The idea behind these two CLOSE macros is to distinguish between
1158 two cases: one when the job we've been doing is finished, and we
1159 want to close the connection and leave, and two when something is
1160 seriously wrong and we're closing the connection as part of
1163 In case of keep_alive, CLOSE_FINISH should leave the connection
1164 open, while CLOSE_INVALIDATE should still close it.
1166 Note that the semantics of the flag `keep_alive' is "this
1167 connection *will* be reused (the server has promised not to close
1168 the connection once we're done)", while the semantics of
1169 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1170 active, registered connection". */
1172 #define CLOSE_FINISH(fd) do { \
1175 if (pconn_active && (fd) == pconn.socket) \
1176 invalidate_persistent (); \
1185 #define CLOSE_INVALIDATE(fd) do { \
1186 if (pconn_active && (fd) == pconn.socket) \
1187 invalidate_persistent (); \
1195 wgint len; /* received length */
1196 wgint contlen; /* expected length */
1197 wgint restval; /* the restart value */
1198 int res; /* the result of last read */
1199 char *rderrmsg; /* error message from read error */
1200 char *newloc; /* new location (redirection) */
1201 char *remote_time; /* remote time-stamp string */
1202 char *error; /* textual HTTP error */
1203 int statcode; /* status code */
1204 wgint rd_size; /* amount of data read from socket */
1205 double dltime; /* time it took to download the data */
1206 const char *referer; /* value of the referer header. */
1207 char *local_file; /* local file name. */
1208 bool timestamp_checked; /* true if pre-download time-stamping checks
1209 * have already been performed */
1210 char *orig_file_name; /* name of file to compare for time-stamping
1211 * (might be != local_file if -K is set) */
1212 wgint orig_file_size; /* size of file to compare for time-stamping */
1213 time_t orig_file_tstamp; /* time-stamp of file to compare for
1218 free_hstat (struct http_stat *hs)
1220 xfree_null (hs->newloc);
1221 xfree_null (hs->remote_time);
1222 xfree_null (hs->error);
1223 xfree_null (hs->rderrmsg);
1224 xfree_null (hs->local_file);
1225 xfree_null (hs->orig_file_name);
1227 /* Guard against being called twice. */
1229 hs->remote_time = NULL;
1233 static char *create_authorization_line (const char *, const char *,
1234 const char *, const char *,
1235 const char *, bool *);
1236 static char *basic_authentication_encode (const char *, const char *);
1237 static bool known_authentication_scheme_p (const char *, const char *);
1238 static void load_cookies (void);
1240 #define BEGINS_WITH(line, string_constant) \
1241 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1242 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1243 || !line[sizeof (string_constant) - 1]))
1245 #define SET_USER_AGENT(req) do { \
1246 if (!opt.useragent) \
1247 request_set_header (req, "User-Agent", \
1248 aprintf ("Wget/%s", version_string), rel_value); \
1249 else if (*opt.useragent) \
1250 request_set_header (req, "User-Agent", opt.useragent, rel_none); \
1253 /* The flags that allow clobbering the file (opening with "wb").
1254 Defined here to avoid repetition later. #### This will require
1256 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1257 || opt.dirstruct || opt.output_document)
1259 /* Retrieve a document through HTTP protocol. It recognizes status
1260 code, and correctly handles redirections. It closes the network
1261 socket. If it receives an error from the functions below it, it
1262 will print it if there is enough information to do so (almost
1263 always), returning the error to the caller (i.e. http_loop).
1265 Various HTTP parameters are stored to hs.
1267 If PROXY is non-NULL, the connection will be made to the proxy
1268 server, and u->url will be requested. */
1270 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1272 struct request *req;
1275 char *user, *passwd;
1279 wgint contlen, contrange;
1286 /* Set to 1 when the authorization has failed permanently and should
1287 not be tried again. */
1288 bool auth_finished = false;
1290 /* Whether NTLM authentication is used for this request. */
1291 bool ntlm_seen = false;
1293 /* Whether our connection to the remote host is through SSL. */
1294 bool using_ssl = false;
1296 /* Whether a HEAD request will be issued (as opposed to GET or
1298 bool head_only = !!(*dt & HEAD_ONLY);
1301 struct response *resp;
1305 /* Whether this connection will be kept alive after the HTTP request
1309 /* Whether keep-alive should be inhibited.
1311 RFC 2068 requests that 1.0 clients not send keep-alive requests
1312 to proxies. This is because many 1.0 proxies do not interpret
1313 the Connection header and transfer it to the remote server,
1314 causing it to not close the connection and leave both the proxy
1315 and the client hanging. */
1316 bool inhibit_keep_alive =
1317 !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1319 /* Headers sent when using POST. */
1320 wgint post_data_size = 0;
1322 bool host_lookup_failed = false;
1325 if (u->scheme == SCHEME_HTTPS)
1327 /* Initialize the SSL context. After this has once been done,
1328 it becomes a no-op. */
1331 scheme_disable (SCHEME_HTTPS);
1332 logprintf (LOG_NOTQUIET,
1333 _("Disabling SSL due to encountered errors.\n"));
1334 return SSLINITFAILED;
1337 #endif /* HAVE_SSL */
1339 /* Initialize certain elements of struct http_stat. */
1343 hs->rderrmsg = NULL;
1345 hs->remote_time = NULL;
1350 /* Prepare the request to send. */
1352 req = request_new ();
1355 const char *meth = "GET";
1358 else if (opt.post_file_name || opt.post_data)
1360 /* Use the full path, i.e. one that includes the leading slash and
1361 the query string. E.g. if u->path is "foo/bar" and u->query is
1362 "param=value", full_path will be "/foo/bar?param=value". */
1365 /* When using SSL over proxy, CONNECT establishes a direct
1366 connection to the HTTPS server. Therefore use the same
1367 argument as when talking to the server directly. */
1368 && u->scheme != SCHEME_HTTPS
1371 meth_arg = xstrdup (u->url);
1373 meth_arg = url_full_path (u);
1374 request_set_method (req, meth, meth_arg);
1377 request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1378 if (*dt & SEND_NOCACHE)
1379 request_set_header (req, "Pragma", "no-cache", rel_none);
1381 request_set_header (req, "Range",
1382 aprintf ("bytes=%s-",
1383 number_to_static_string (hs->restval)),
1385 SET_USER_AGENT (req);
1386 request_set_header (req, "Accept", "*/*", rel_none);
1388 /* Find the username and password for authentication. */
1391 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1392 user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1393 passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1397 /* We have the username and the password, but haven't tried
1398 any authorization yet. Let's see if the "Basic" method
1399 works. If not, we'll come back here and construct a
1400 proper authorization method with the right challenges.
1402 If we didn't employ this kind of logic, every URL that
1403 requires authorization would have to be processed twice,
1404 which is very suboptimal and generates a bunch of false
1405 "unauthorized" errors in the server log.
1407 #### But this logic also has a serious problem when used
1408 with stronger authentications: we *first* transmit the
1409 username and the password in clear text, and *then* attempt a
1410 stronger authentication scheme. That cannot be right! We
1411 are only fortunate that almost everyone still uses the
1412 `Basic' scheme anyway.
1414 There should be an option to prevent this from happening, for
1415 those who use strong authentication schemes and value their
1417 request_set_header (req, "Authorization",
1418 basic_authentication_encode (user, passwd),
1425 char *proxy_user, *proxy_passwd;
1426 /* For normal username and password, URL components override
1427 command-line/wgetrc parameters. With proxy
1428 authentication, it's the reverse, because proxy URLs are
1429 normally the "permanent" ones, so command-line args
1430 should take precedence. */
1431 if (opt.proxy_user && opt.proxy_passwd)
1433 proxy_user = opt.proxy_user;
1434 proxy_passwd = opt.proxy_passwd;
1438 proxy_user = proxy->user;
1439 proxy_passwd = proxy->passwd;
1441 /* #### This does not appear right. Can't the proxy request,
1442 say, `Digest' authentication? */
1443 if (proxy_user && proxy_passwd)
1444 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1446 /* If we're using a proxy, we will be connecting to the proxy
1450 /* Proxy authorization over SSL is handled below. */
1452 if (u->scheme != SCHEME_HTTPS)
1454 request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1457 /* Generate the Host header, HOST:PORT. Take into account that:
1459 - Broken server-side software often doesn't recognize the PORT
1460 argument, so we must generate "Host: www.server.com" instead of
1461 "Host: www.server.com:80" (and likewise for https port).
1463 - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
1464 becomes ambiguous and needs to be rewritten as "Host:
1465 [3ffe:8100:200:2::2]:1234". */
1467 /* Formats arranged for hfmt[add_port][add_squares]. */
1468 static const char *hfmt[][2] = {
1469 { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
1471 int add_port = u->port != scheme_default_port (u->scheme);
1472 int add_squares = strchr (u->host, ':') != NULL;
1473 request_set_header (req, "Host",
1474 aprintf (hfmt[add_port][add_squares], u->host, u->port),
1478 if (!inhibit_keep_alive)
1479 request_set_header (req, "Connection", "Keep-Alive", rel_none);
1482 request_set_header (req, "Cookie",
1483 cookie_header (wget_cookie_jar,
1484 u->host, u->port, u->path,
1486 u->scheme == SCHEME_HTTPS
1493 if (opt.post_data || opt.post_file_name)
1495 request_set_header (req, "Content-Type",
1496 "application/x-www-form-urlencoded", rel_none);
1498 post_data_size = strlen (opt.post_data);
1501 post_data_size = file_size (opt.post_file_name);
1502 if (post_data_size == -1)
1504 logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
1505 opt.post_file_name, strerror (errno));
1509 request_set_header (req, "Content-Length",
1510 xstrdup (number_to_static_string (post_data_size)),
1514 /* Add the user headers. */
1515 if (opt.user_headers)
1518 for (i = 0; opt.user_headers[i]; i++)
1519 request_set_user_header (req, opt.user_headers[i]);
1523 /* We need to come back here when the initial attempt to retrieve
1524 without authorization header fails. (Expected to happen at least
1525 for the Digest authorization scheme.) */
1529 /* Establish the connection. */
1531 if (!inhibit_keep_alive)
1533 /* Look for a persistent connection to target host, unless a
1534 proxy is used. The exception is when SSL is in use, in which
1535 case the proxy is nothing but a passthrough to the target
1536 host, registered as a connection to the latter. */
1537 struct url *relevant = conn;
1539 if (u->scheme == SCHEME_HTTPS)
1543 if (persistent_available_p (relevant->host, relevant->port,
1545 relevant->scheme == SCHEME_HTTPS,
1549 &host_lookup_failed))
1551 sock = pconn.socket;
1552 using_ssl = pconn.ssl;
1553 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1554 escnonprint (pconn.host), pconn.port);
1555 DEBUGP (("Reusing fd %d.\n", sock));
1556 if (pconn.authorized)
1557 /* If the connection is already authorized, the "Basic"
1558 authorization added by code above is unnecessary and
1560 request_remove_header (req, "Authorization");
1566 /* In its current implementation, persistent_available_p will
1567 look up conn->host in some cases. If that lookup failed, we
1568 don't need to bother with connect_to_host. */
1569 if (host_lookup_failed)
1575 sock = connect_to_host (conn->host, conn->port);
1584 return (retryable_socket_connect_error (errno)
1585 ? CONERROR : CONIMPOSSIBLE);
1589 if (proxy && u->scheme == SCHEME_HTTPS)
1591 /* When requesting SSL URLs through proxies, use the
1592 CONNECT method to request passthrough. */
1593 struct request *connreq = request_new ();
1594 request_set_method (connreq, "CONNECT",
1595 aprintf ("%s:%d", u->host, u->port));
1596 SET_USER_AGENT (connreq);
1599 request_set_header (connreq, "Proxy-Authorization",
1600 proxyauth, rel_value);
1601 /* Now that PROXYAUTH is part of the CONNECT request,
1602 zero it out so we don't send proxy authorization with
1603 the regular request below. */
1606 /* Examples in rfc2817 use the Host header in CONNECT
1607 requests. I don't see how that gains anything, given
1608 that the contents of Host would be exactly the same as
1609 the contents of CONNECT. */
1611 write_error = request_send (connreq, sock);
1612 request_free (connreq);
1613 if (write_error < 0)
1615 CLOSE_INVALIDATE (sock);
1619 head = read_http_response_head (sock);
1622 logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1624 CLOSE_INVALIDATE (sock);
1633 DEBUGP (("proxy responded with: [%s]\n", head));
1635 resp = resp_new (head);
1636 statcode = resp_status (resp, &message);
1639 if (statcode != 200)
1642 logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1643 message ? escnonprint (message) : "?");
1644 xfree_null (message);
1647 xfree_null (message);
1649 /* SOCK is now *really* connected to u->host, so update CONN
1650 to reflect this. That way register_persistent will
1651 register SOCK as being connected to u->host:u->port. */
1655 if (conn->scheme == SCHEME_HTTPS)
1657 if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1664 #endif /* HAVE_SSL */
1667 /* Send the request to server. */
1668 write_error = request_send (req, sock);
1670 if (write_error >= 0)
1674 DEBUGP (("[POST data: %s]\n", opt.post_data));
1675 write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1677 else if (opt.post_file_name && post_data_size != 0)
1678 write_error = post_file (sock, opt.post_file_name, post_data_size);
1681 if (write_error < 0)
1683 CLOSE_INVALIDATE (sock);
1687 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1688 proxy ? "Proxy" : "HTTP");
1693 head = read_http_response_head (sock);
1698 logputs (LOG_NOTQUIET, _("No data received.\n"));
1699 CLOSE_INVALIDATE (sock);
1705 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1707 CLOSE_INVALIDATE (sock);
1712 DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1714 resp = resp_new (head);
1716 /* Check for status line. */
1718 statcode = resp_status (resp, &message);
1719 if (!opt.server_response)
1720 logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1721 message ? escnonprint (message) : "");
1724 logprintf (LOG_VERBOSE, "\n");
1725 print_server_response (resp, " ");
1728 /* Determine the local filename if needed. Notice that if -O is used
1729 * hstat.local_file is set by http_loop to the argument of -O. */
1730 if (!hs->local_file)
1732 /* Honor Content-Disposition whether possible. */
1733 if (!opt.content_disposition
1734 || !resp_header_copy (resp, "Content-Disposition",
1735 hdrval, sizeof (hdrval))
1736 || !parse_content_disposition (hdrval, &hs->local_file))
1738 /* The Content-Disposition header is missing or broken.
1739 * Choose unique file name according to given URL. */
1740 hs->local_file = url_file_name (u);
1744 /* TODO: perform this check only once. */
1745 if (file_exists_p (hs->local_file))
1749 /* If opt.noclobber is turned on and file already exists, do not
1750 retrieve the file */
1751 logprintf (LOG_VERBOSE, _("\
1752 File `%s' already there; not retrieving.\n\n"), hs->local_file);
1753 /* If the file is there, we suppose it's retrieved OK. */
1756 /* #### Bogusness alert. */
1757 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1758 if (has_html_suffix_p (hs->local_file))
1763 else if (!ALLOW_CLOBBER)
1765 char *unique = unique_name (hs->local_file, true);
1766 if (unique != hs->local_file)
1767 xfree (hs->local_file);
1768 hs->local_file = unique;
1772 /* Support timestamping */
1773 /* TODO: move this code out of gethttp. */
1774 if (opt.timestamping && !hs->timestamp_checked)
1776 size_t filename_len = strlen (hs->local_file);
1777 char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1778 bool local_dot_orig_file_exists = false;
1779 char *local_filename = NULL;
1782 if (opt.backup_converted)
1783 /* If -K is specified, we'll act on the assumption that it was specified
1784 last time these files were downloaded as well, and instead of just
1785 comparing local file X against server file X, we'll compare local
1786 file X.orig (if extant, else X) against server file X. If -K
1787 _wasn't_ specified last time, or the server contains files called
1788 *.orig, -N will be back to not operating correctly with -k. */
1790 /* Would a single s[n]printf() call be faster? --dan
1792 Definitely not. sprintf() is horribly slow. It's a
1793 different question whether the difference between the two
1794 affects a program. Usually I'd say "no", but at one
1795 point I profiled Wget, and found that a measurable and
1796 non-negligible amount of time was lost calling sprintf()
1797 in url.c. Replacing sprintf with inline calls to
1798 strcpy() and number_to_string() made a difference.
1800 memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
1801 memcpy (filename_plus_orig_suffix + filename_len,
1802 ".orig", sizeof (".orig"));
1804 /* Try to stat() the .orig file. */
1805 if (stat (filename_plus_orig_suffix, &st) == 0)
1807 local_dot_orig_file_exists = true;
1808 local_filename = filename_plus_orig_suffix;
1812 if (!local_dot_orig_file_exists)
1813 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1814 if (stat (hs->local_file, &st) == 0)
1815 local_filename = hs->local_file;
1817 if (local_filename != NULL)
1818 /* There was a local file, so we'll check later to see if the version
1819 the server has is the same version we already have, allowing us to
1822 hs->orig_file_name = xstrdup (local_filename);
1823 hs->orig_file_size = st.st_size;
1824 hs->orig_file_tstamp = st.st_mtime;
1826 /* Modification time granularity is 2 seconds for Windows, so
1827 increase local time by 1 second for later comparison. */
1828 ++hs->orig_file_tstamp;
1833 if (!opt.ignore_length
1834 && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1838 parsed = str_to_wgint (hdrval, NULL, 10);
1839 if (parsed == WGINT_MAX && errno == ERANGE)
1841 #### If Content-Length is out of range, it most likely
1842 means that the file is larger than 2G and that we're
1843 compiled without LFS. In that case we should probably
1844 refuse to even attempt to download the file. */
1850 /* Check for keep-alive related responses. */
1851 if (!inhibit_keep_alive && contlen != -1)
1853 if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1855 else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1857 if (0 == strcasecmp (hdrval, "Keep-Alive"))
1862 /* The server has promised that it will not close the connection
1863 when we're done. This means that we can register it. */
1864 register_persistent (conn->host, conn->port, sock, using_ssl);
1866 if (statcode == HTTP_STATUS_UNAUTHORIZED)
1868 /* Authorization is required. */
1869 if (keep_alive && !head_only && skip_short_body (sock, contlen))
1870 CLOSE_FINISH (sock);
1872 CLOSE_INVALIDATE (sock);
1873 pconn.authorized = false;
1874 if (!auth_finished && (user && passwd))
1876 /* IIS sends multiple copies of WWW-Authenticate, one with
1877 the value "negotiate", and other(s) with data. Loop over
1878 all the occurrences and pick the one we recognize. */
1880 const char *wabeg, *waend;
1881 char *www_authenticate = NULL;
1883 (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1884 &wabeg, &waend)) != -1;
1886 if (known_authentication_scheme_p (wabeg, waend))
1888 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1892 if (!www_authenticate)
1893 /* If the authentication header is missing or
1894 unrecognized, there's no sense in retrying. */
1895 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1896 else if (BEGINS_WITH (www_authenticate, "Basic"))
1897 /* If the authentication scheme is "Basic", which we send
1898 by default, there's no sense in retrying either. (This
1899 should be changed when we stop sending "Basic" data by
1905 pth = url_full_path (u);
1906 request_set_header (req, "Authorization",
1907 create_authorization_line (www_authenticate,
1909 request_method (req),
1913 if (BEGINS_WITH (www_authenticate, "NTLM"))
1916 goto retry_with_auth;
1919 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1923 else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1925 /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1927 pconn.authorized = true;
1931 hs->statcode = statcode;
1933 hs->error = xstrdup (_("Malformed status line"));
1935 hs->error = xstrdup (_("(no description)"));
1937 hs->error = xstrdup (message);
1938 xfree_null (message);
1940 type = resp_header_strdup (resp, "Content-Type");
1943 char *tmp = strchr (type, ';');
1946 while (tmp > type && ISSPACE (tmp[-1]))
1951 hs->newloc = resp_header_strdup (resp, "Location");
1952 hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1954 /* Handle (possibly multiple instances of) the Set-Cookie header. */
1958 const char *scbeg, *scend;
1959 /* The jar should have been created by now. */
1960 assert (wget_cookie_jar != NULL);
1962 (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1963 &scbeg, &scend)) != -1;
1966 char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1967 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
1968 u->path, set_cookie);
1972 if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1974 wgint first_byte_pos, last_byte_pos, entity_length;
1975 if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1977 contrange = first_byte_pos;
1981 /* 20x responses are counted among successful by default. */
1982 if (H_20X (statcode))
1985 /* Return if redirected. */
1986 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1988 /* RFC2068 says that in case of the 300 (multiple choices)
1989 response, the server can output a preferred URL through
1990 `Location' header; otherwise, the request should be treated
1991 like GET. So, if the location is set, it will be a
1992 redirection; otherwise, just proceed normally. */
1993 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1997 logprintf (LOG_VERBOSE,
1998 _("Location: %s%s\n"),
1999 hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
2000 hs->newloc ? _(" [following]") : "");
2001 if (keep_alive && !head_only && skip_short_body (sock, contlen))
2002 CLOSE_FINISH (sock);
2004 CLOSE_INVALIDATE (sock);
2010 /* If content-type is not given, assume text/html. This is because
2011 of the multitude of broken CGI's that "forget" to generate the
2014 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
2015 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
2020 if (opt.html_extension && (*dt & TEXTHTML))
2021 /* -E / --html-extension / html_extension = on was specified, and this is a
2022 text/html file. If some case-insensitive variation on ".htm[l]" isn't
2023 already the file's suffix, tack on ".html". */
2025 char *last_period_in_local_filename = strrchr (hs->local_file, '.');
2027 if (last_period_in_local_filename == NULL
2028 || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
2029 || 0 == strcasecmp (last_period_in_local_filename, ".html")))
2031 int local_filename_len = strlen (hs->local_file);
2032 /* Resize the local file, allowing for ".html" preceded by
2033 optional ".NUMBER". */
2034 hs->local_file = xrealloc (hs->local_file,
2035 local_filename_len + 24 + sizeof (".html"));
2036 strcpy(hs->local_file + local_filename_len, ".html");
2037 /* If clobbering is not allowed and the file, as named,
2038 exists, tack on ".NUMBER.html" instead. */
2039 if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
2043 sprintf (hs->local_file + local_filename_len,
2044 ".%d.html", ext_num++);
2045 while (file_exists_p (hs->local_file));
2047 *dt |= ADDED_HTML_EXTENSION;
2051 if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
2053 /* If `-c' is in use and the file has been fully downloaded (or
2054 the remote file has shrunk), Wget effectively requests bytes
2055 after the end of file and the server response with 416. */
2056 logputs (LOG_VERBOSE, _("\
2057 \n The file is already fully retrieved; nothing to do.\n\n"));
2058 /* In case the caller inspects. */
2061 /* Mark as successfully retrieved. */
2064 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
2065 might be more bytes in the body. */
2066 return RETRUNNEEDED;
2068 if ((contrange != 0 && contrange != hs->restval)
2069 || (H_PARTIAL (statcode) && !contrange))
2071 /* The Range request was somehow misunderstood by the server.
2074 CLOSE_INVALIDATE (sock);
2077 hs->contlen = contlen + contrange;
2083 /* No need to print this output if the body won't be
2084 downloaded at all, or if the original server response is
2086 logputs (LOG_VERBOSE, _("Length: "));
2089 logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
2090 if (contlen + contrange >= 1024)
2091 logprintf (LOG_VERBOSE, " (%s)",
2092 human_readable (contlen + contrange));
2095 if (contlen >= 1024)
2096 logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
2097 number_to_static_string (contlen),
2098 human_readable (contlen));
2100 logprintf (LOG_VERBOSE, _(", %s remaining"),
2101 number_to_static_string (contlen));
2105 logputs (LOG_VERBOSE,
2106 opt.ignore_length ? _("ignored") : _("unspecified"));
2108 logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
2110 logputs (LOG_VERBOSE, "\n");
2114 type = NULL; /* We don't need it any more. */
2116 /* Return if we have no intention of further downloading. */
2117 if (!(*dt & RETROKF) || head_only)
2119 /* In case the caller cares to look... */
2124 /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
2125 servers not to send body in response to a HEAD request, and
2126 those that do will likely be caught by test_socket_open.
2127 If not, they can be worked around using
2128 `--no-http-keep-alive'. */
2129 CLOSE_FINISH (sock);
2130 else if (keep_alive && skip_short_body (sock, contlen))
2131 /* Successfully skipped the body; also keep using the socket. */
2132 CLOSE_FINISH (sock);
2134 CLOSE_INVALIDATE (sock);
2135 return RETRFINISHED;
2138 /* Open the local file. */
2141 mkalldirs (hs->local_file);
2143 rotate_backups (hs->local_file);
2145 fp = fopen (hs->local_file, "ab");
2146 else if (ALLOW_CLOBBER)
2147 fp = fopen (hs->local_file, "wb");
2150 fp = fopen_excl (hs->local_file, true);
2151 if (!fp && errno == EEXIST)
2153 /* We cannot just invent a new name and use it (which is
2154 what functions like unique_create typically do)
2155 because we told the user we'd use this name.
2156 Instead, return and retry the download. */
2157 logprintf (LOG_NOTQUIET,
2158 _("%s has sprung into existence.\n"),
2160 CLOSE_INVALIDATE (sock);
2161 return FOPEN_EXCL_ERR;
2166 logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
2167 CLOSE_INVALIDATE (sock);
2174 /* Print fetch message, if opt.verbose. */
2177 logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
2178 HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
2181 /* This confuses the timestamping code that checks for file size.
2182 #### The timestamping code should be smarter about file size. */
2183 if (opt.save_headers && hs->restval == 0)
2184 fwrite (head, 1, strlen (head), fp);
2186 /* Now we no longer need to store the response header. */
2189 /* Download the request body. */
2192 /* If content-length is present, read that much; otherwise, read
2193 until EOF. The HTTP spec doesn't require the server to
2194 actually close the connection when it's done sending data. */
2195 flags |= rb_read_exactly;
2196 if (hs->restval > 0 && contrange == 0)
2197 /* If the server ignored our range request, instruct fd_read_body
2198 to skip the first RESTVAL bytes of body. */
2199 flags |= rb_skip_startpos;
2200 hs->len = hs->restval;
2202 hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
2203 hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
2207 CLOSE_FINISH (sock);
2211 hs->rderrmsg = xstrdup (fd_errstr (sock));
2212 CLOSE_INVALIDATE (sock);
2219 return RETRFINISHED;
2222 /* The genuine HTTP loop! This is the part where the retrieval is
2223 retried, and retried, and retried, and... */
2225 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
2226 int *dt, struct url *proxy)
2229 bool got_head = false; /* used for time-stamping and filename detection */
2230 bool got_name = false;
2233 uerr_t err, ret = TRYLIMEXC;
2234 time_t tmr = -1; /* remote time-stamp */
2235 wgint local_size = 0; /* the size of the local file */
2236 struct http_stat hstat; /* HTTP status */
2239 /* Assert that no value for *LOCAL_FILE was passed. */
2240 assert (local_file == NULL || *local_file == NULL);
2242 /* Set LOCAL_FILE parameter. */
2243 if (local_file && opt.output_document)
2244 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2246 /* Reset NEWLOC parameter. */
2249 /* This used to be done in main(), but it's a better idea to do it
2250 here so that we don't go through the hoops if we're just using
2255 /* Warn on (likely bogus) wildcard usage in HTTP. */
2256 if (opt.ftp_glob && has_wildcards_p (u->path))
2257 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2259 /* Setup hstat struct. */
2261 hstat.referer = referer;
2263 if (opt.output_document)
2265 hstat.local_file = xstrdup (opt.output_document);
2269 /* Reset the counter. */
2272 /* Reset the document type. */
2278 /* Increment the pass counter. */
2280 sleep_between_retrievals (count);
2282 /* Get the current time string. */
2283 tms = time_str (time (NULL));
2285 if (opt.spider && !got_head)
2286 logprintf (LOG_VERBOSE, _("\
2287 Spider mode enabled. Check if remote file exists.\n"));
2289 /* Print fetch message, if opt.verbose. */
2292 char *hurl = url_string (u, true);
2297 sprintf (tmp, _("(try:%2d)"), count);
2298 logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
2303 logprintf (LOG_NOTQUIET, "--%s-- %s\n",
2308 ws_changetitle (hurl);
2313 /* Default document type is empty. However, if spider mode is
2314 on or time-stamping is employed, HEAD_ONLY commands is
2315 encoded within *dt. */
2316 if (((opt.spider || opt.timestamping) && !got_head)
2317 || (opt.always_rest && !got_name))
2322 /* Decide whether or not to restart. */
2325 && stat (hstat.local_file, &st) == 0
2326 && S_ISREG (st.st_mode))
2327 /* When -c is used, continue from on-disk size. (Can't use
2328 hstat.len even if count>1 because we don't want a failed
2329 first attempt to clobber existing data.) */
2330 hstat.restval = st.st_size;
2332 /* otherwise, continue where the previous try left off */
2333 hstat.restval = hstat.len;
2337 /* Decide whether to send the no-cache directive. We send it in
2339 a) we're using a proxy, and we're past our first retrieval.
2340 Some proxies are notorious for caching incomplete data, so
2341 we require a fresh get.
2342 b) caching is explicitly inhibited. */
2343 if ((proxy && count > 1) /* a */
2344 || !opt.allow_cache) /* b */
2345 *dt |= SEND_NOCACHE;
2347 *dt &= ~SEND_NOCACHE;
2349 /* Try fetching the document, or at least its head. */
2350 err = gethttp (u, &hstat, dt, proxy);
2353 tms = time_str (time (NULL));
2355 /* Get the new location (with or without the redirection). */
2357 *newloc = xstrdup (hstat.newloc);
2361 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2362 case CONERROR: case READERR: case WRITEFAILED:
2363 case RANGEERR: case FOPEN_EXCL_ERR:
2364 /* Non-fatal errors continue executing the loop, which will
2365 bring them to "while" statement at the end, to judge
2366 whether the number of tries was exceeded. */
2367 printwhat (count, opt.ntry);
2369 case FWRITEERR: case FOPENERR:
2370 /* Another fatal error. */
2371 logputs (LOG_VERBOSE, "\n");
2372 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2373 hstat.local_file, strerror (errno));
2374 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2375 case SSLINITFAILED: case CONTNOTSUPPORTED:
2376 /* Fatal errors just return from the function. */
2380 /* Another fatal error. */
2381 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2385 /* Return the new location to the caller. */
2388 logprintf (LOG_NOTQUIET,
2389 _("ERROR: Redirection (%d) without location.\n"),
2399 /* The file was already fully retrieved. */
2403 /* Deal with you later. */
2406 /* All possibilities should have been exhausted. */
2410 if (!(*dt & RETROKF))
2415 /* #### Ugly ugly ugly! */
2416 hurl = url_string (u, true);
2417 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2419 /* Maybe we should always keep track of broken links, not just in
2423 /* #### Again: ugly ugly ugly! */
2425 hurl = url_string (u, true);
2426 nonexisting_url (hurl);
2427 logprintf (LOG_NOTQUIET, _("\
2428 Remote file does not exist -- broken link!!!\n"));
2432 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2433 tms, hstat.statcode, escnonprint (hstat.error));
2435 logputs (LOG_VERBOSE, "\n");
2441 /* Did we get the time-stamp? */
2444 bool restart_loop = false;
2446 if (opt.timestamping && !hstat.remote_time)
2448 logputs (LOG_NOTQUIET, _("\
2449 Last-modified header missing -- time-stamps turned off.\n"));
2451 else if (hstat.remote_time)
2453 /* Convert the date-string into struct tm. */
2454 tmr = http_atotm (hstat.remote_time);
2455 if (tmr == (time_t) (-1))
2456 logputs (LOG_VERBOSE, _("\
2457 Last-modified header invalid -- time-stamp ignored.\n"));
2460 /* The time-stamping section. */
2461 if (opt.timestamping)
2463 if (hstat.orig_file_name) /* Perform the following checks only
2464 if the file we're supposed to
2465 download already exists. */
2467 if (hstat.remote_time &&
2468 tmr != (time_t) (-1))
2470 /* Now time-stamping can be used validly. Time-stamping
2471 means that if the sizes of the local and remote file
2472 match, and local file is newer than the remote file,
2473 it will not be retrieved. Otherwise, the normal
2474 download procedure is resumed. */
2475 if (hstat.orig_file_tstamp >= tmr)
2477 if (hstat.contlen == -1
2478 || hstat.orig_file_size == hstat.contlen)
2480 logprintf (LOG_VERBOSE, _("\
2481 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2482 hstat.orig_file_name);
2488 logprintf (LOG_VERBOSE, _("\
2489 The sizes do not match (local %s) -- retrieving.\n"),
2490 number_to_static_string (local_size));
2494 logputs (LOG_VERBOSE,
2495 _("Remote file is newer, retrieving.\n"));
2497 logputs (LOG_VERBOSE, "\n");
2501 /* free_hstat (&hstat); */
2502 hstat.timestamp_checked = true;
2503 restart_loop = true;
2506 if (opt.always_rest)
2509 restart_loop = true;
2518 logputs (LOG_VERBOSE, _("\
2519 Remote file exists and could contain links to other resources -- retrieving.\n\n"));
2520 restart_loop = true;
2524 logprintf (LOG_VERBOSE, _("\
2525 Remote file exists but does not contain any link -- not retrieving.\n\n"));
2532 logprintf (LOG_VERBOSE, _("\
2533 Remote file exists but recursion is disabled -- not retrieving.\n\n"));
2539 got_head = true; /* no more time-stamping */
2541 count = 0; /* the retrieve count for HEAD is reset */
2547 if ((tmr != (time_t) (-1))
2548 && ((hstat.len == hstat.contlen) ||
2549 ((hstat.res == 0) && (hstat.contlen == -1))))
2551 /* #### This code repeats in http.c and ftp.c. Move it to a
2553 const char *fl = NULL;
2554 if (opt.output_document)
2556 if (output_stream_regular)
2557 fl = opt.output_document;
2560 fl = hstat.local_file;
2564 /* End of time-stamping section. */
2566 tmrate = retr_rate (hstat.rd_size, hstat.dltime);
2567 total_download_time += hstat.dltime;
2569 if (hstat.len == hstat.contlen)
2573 logprintf (LOG_VERBOSE,
2574 _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2575 tms, tmrate, hstat.local_file,
2576 number_to_static_string (hstat.len),
2577 number_to_static_string (hstat.contlen));
2578 logprintf (LOG_NONVERBOSE,
2579 "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2581 number_to_static_string (hstat.len),
2582 number_to_static_string (hstat.contlen),
2583 hstat.local_file, count);
2586 total_downloaded_bytes += hstat.len;
2588 /* Remember that we downloaded the file for later ".orig" code. */
2589 if (*dt & ADDED_HTML_EXTENSION)
2590 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2592 downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2597 else if (hstat.res == 0) /* No read error */
2599 if (hstat.contlen == -1) /* We don't know how much we were supposed
2600 to get, so assume we succeeded. */
2604 logprintf (LOG_VERBOSE,
2605 _("%s (%s) - `%s' saved [%s]\n\n"),
2606 tms, tmrate, hstat.local_file,
2607 number_to_static_string (hstat.len));
2608 logprintf (LOG_NONVERBOSE,
2609 "%s URL:%s [%s] -> \"%s\" [%d]\n",
2610 tms, u->url, number_to_static_string (hstat.len),
2611 hstat.local_file, count);
2614 total_downloaded_bytes += hstat.len;
2616 /* Remember that we downloaded the file for later ".orig" code. */
2617 if (*dt & ADDED_HTML_EXTENSION)
2618 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2620 downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2625 else if (hstat.len < hstat.contlen) /* meaning we lost the
2626 connection too soon */
2628 logprintf (LOG_VERBOSE,
2629 _("%s (%s) - Connection closed at byte %s. "),
2630 tms, tmrate, number_to_static_string (hstat.len));
2631 printwhat (count, opt.ntry);
2635 /* Getting here would mean reading more data than
2636 requested with content-length, which we never do. */
2639 else /* from now on hstat.res can only be -1 */
2641 if (hstat.contlen == -1)
2643 logprintf (LOG_VERBOSE,
2644 _("%s (%s) - Read error at byte %s (%s)."),
2645 tms, tmrate, number_to_static_string (hstat.len),
2647 printwhat (count, opt.ntry);
2650 else /* hstat.res == -1 and contlen is given */
2652 logprintf (LOG_VERBOSE,
2653 _("%s (%s) - Read error at byte %s/%s (%s). "),
2655 number_to_static_string (hstat.len),
2656 number_to_static_string (hstat.contlen),
2658 printwhat (count, opt.ntry);
2664 while (!opt.ntry || (count < opt.ntry));
2668 *local_file = xstrdup (hstat.local_file);
2669 free_hstat (&hstat);
2674 /* Check whether the result of strptime() indicates success.
2675 strptime() returns the pointer to how far it got to in the string.
2676 The processing has been successful if the string is at `GMT' or
2677 `+X', or at the end of the string.
2679 In extended regexp parlance, the function returns 1 if P matches
2680 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2681 can return) is considered a failure and 0 is returned. */
2683 check_end (const char *p)
2687 while (ISSPACE (*p))
2690 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2691 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2697 /* Convert the textual specification of time in TIME_STRING to the
2698 number of seconds since the Epoch.
2700 TIME_STRING can be in any of the three formats RFC2616 allows the
2701 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2702 as well as the time format used in the Set-Cookie header.
2703 Timezones are ignored, and should be GMT.
2705 Return the computed time_t representation, or -1 if the conversion
2708 This function uses strptime with various string formats for parsing
2709 TIME_STRING. This results in a parser that is not as lenient in
2710 interpreting TIME_STRING as I would like it to be. Being based on
2711 strptime, it always allows shortened months, one-digit days, etc.,
2712 but due to the multitude of formats in which time can be
2713 represented, an ideal HTTP time parser would be even more
2714 forgiving. It should completely ignore things like week days and
2715 concentrate only on the various forms of representing years,
2716 months, days, hours, minutes, and seconds. For example, it would
2717 be nice if it accepted ISO 8601 out of the box.
2719 I've investigated free and PD code for this purpose, but none was
2720 usable. getdate was big and unwieldy, and had potential copyright
2721 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2722 distributed with phttpd, is excellent, but we cannot use it because
2723 it is not assigned to the FSF. So I stuck it with strptime. */
2726 http_atotm (const char *time_string)
2728 /* NOTE: Solaris strptime man page claims that %n and %t match white
2729 space, but that's not universally available. Instead, we simply
2730 use ` ' to mean "skip all WS", which works under all strptime
2731 implementations I've tested. */
2733 static const char *time_formats[] = {
2734 "%a, %d %b %Y %T", /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2735 "%A, %d-%b-%y %T", /* rfc850: Thursday, 29-Jan-98 22:12:57 */
2736 "%a %b %d %T %Y", /* asctime: Thu Jan 29 22:12:57 1998 */
2737 "%a, %d-%b-%Y %T" /* cookies: Thu, 29-Jan-1998 22:12:57
2738 (used in Set-Cookie, defined in the
2739 Netscape cookie specification.) */
2741 const char *oldlocale;
2743 time_t ret = (time_t) -1;
2745 /* Solaris strptime fails to recognize English month names in
2746 non-English locales, which we work around by temporarily setting
2747 locale to C before invoking strptime. */
2748 oldlocale = setlocale (LC_TIME, NULL);
2749 setlocale (LC_TIME, "C");
2751 for (i = 0; i < countof (time_formats); i++)
2755 /* Some versions of strptime use the existing contents of struct
2756 tm to recalculate the date according to format. Zero it out
2757 to prevent stack garbage from influencing strptime. */
2760 if (check_end (strptime (time_string, time_formats[i], &t)))
2767 /* Restore the previous locale. */
2768 setlocale (LC_TIME, oldlocale);
2773 /* Authorization support: We support three authorization schemes:
2775 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2777 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2778 consisting of answering to the server's challenge with the proper
2781 * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2782 Stenberg for libcurl. Like digest, NTLM is based on a
2783 challenge-response mechanism, but unlike digest, it is non-standard
2784 (authenticates TCP connections rather than requests), undocumented
2785 and Microsoft-specific. */
2787 /* Create the authentication header contents for the `Basic' scheme.
2788 This is done by encoding the string "USER:PASS" to base64 and
2789 prepending the string "Basic " in front of it. */
2792 basic_authentication_encode (const char *user, const char *passwd)
2795 int len1 = strlen (user) + 1 + strlen (passwd);
2797 t1 = (char *)alloca (len1 + 1);
2798 sprintf (t1, "%s:%s", user, passwd);
2800 t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2801 base64_encode (t1, len1, t2);
2803 return concat_strings ("Basic ", t2, (char *) 0);
2806 #define SKIP_WS(x) do { \
2807 while (ISSPACE (*(x))) \
2811 #ifdef ENABLE_DIGEST
2812 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2813 an array of 16 bytes containing the hash keys, and BUF should be a
2814 buffer of 33 writable characters (32 for hex digits plus one for
2815 zero termination). */
2817 dump_hash (char *buf, const unsigned char *hash)
2821 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2823 *buf++ = XNUM_TO_digit (*hash >> 4);
2824 *buf++ = XNUM_TO_digit (*hash & 0xf);
2829 /* Take the line apart to find the challenge, and compose a digest
2830 authorization header. See RFC2069 section 2.1.2. */
2832 digest_authentication_encode (const char *au, const char *user,
2833 const char *passwd, const char *method,
2836 static char *realm, *opaque, *nonce;
2841 { "realm", &realm },
2842 { "opaque", &opaque },
2846 param_token name, value;
2848 realm = opaque = nonce = NULL;
2850 au += 6; /* skip over `Digest' */
2851 while (extract_param (&au, &name, &value, ','))
2854 for (i = 0; i < countof (options); i++)
2855 if (name.e - name.b == strlen (options[i].name)
2856 && 0 == strncmp (name.b, options[i].name, name.e - name.b))
2858 *options[i].variable = strdupdelim (value.b, value.e);
2862 if (!realm || !nonce || !user || !passwd || !path || !method)
2865 xfree_null (opaque);
2870 /* Calculate the digest value. */
2872 ALLOCA_MD5_CONTEXT (ctx);
2873 unsigned char hash[MD5_HASHLEN];
2874 char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2875 char response_digest[MD5_HASHLEN * 2 + 1];
2877 /* A1BUF = H(user ":" realm ":" password) */
2879 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2880 gen_md5_update ((unsigned char *)":", 1, ctx);
2881 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2882 gen_md5_update ((unsigned char *)":", 1, ctx);
2883 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2884 gen_md5_finish (ctx, hash);
2885 dump_hash (a1buf, hash);
2887 /* A2BUF = H(method ":" path) */
2889 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2890 gen_md5_update ((unsigned char *)":", 1, ctx);
2891 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2892 gen_md5_finish (ctx, hash);
2893 dump_hash (a2buf, hash);
2895 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2897 gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
2898 gen_md5_update ((unsigned char *)":", 1, ctx);
2899 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2900 gen_md5_update ((unsigned char *)":", 1, ctx);
2901 gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
2902 gen_md5_finish (ctx, hash);
2903 dump_hash (response_digest, hash);
2905 res = xmalloc (strlen (user)
2910 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2911 + (opaque ? strlen (opaque) : 0)
2913 sprintf (res, "Digest \
2914 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2915 user, realm, nonce, path, response_digest);
2918 char *p = res + strlen (res);
2919 strcat (p, ", opaque=\"");
2926 #endif /* ENABLE_DIGEST */
2928 /* Computing the size of a string literal must take into account that
2929 value returned by sizeof includes the terminating \0. */
2930 #define STRSIZE(literal) (sizeof (literal) - 1)
2932 /* Whether chars in [b, e) begin with the literal string provided as
2933 first argument and are followed by whitespace or terminating \0.
2934 The comparison is case-insensitive. */
2935 #define STARTS(literal, b, e) \
2936 ((e) - (b) >= STRSIZE (literal) \
2937 && 0 == strncasecmp (b, literal, STRSIZE (literal)) \
2938 && ((e) - (b) == STRSIZE (literal) \
2939 || ISSPACE (b[STRSIZE (literal)])))
2942 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2944 return STARTS ("Basic", hdrbeg, hdrend)
2945 #ifdef ENABLE_DIGEST
2946 || STARTS ("Digest", hdrbeg, hdrend)
2949 || STARTS ("NTLM", hdrbeg, hdrend)
2956 /* Create the HTTP authorization request header. When the
2957 `WWW-Authenticate' response header is seen, according to the
2958 authorization scheme specified in that header (`Basic' and `Digest'
2959 are supported by the current implementation), produce an
2960 appropriate HTTP authorization request header. */
2962 create_authorization_line (const char *au, const char *user,
2963 const char *passwd, const char *method,
2964 const char *path, bool *finished)
2966 /* We are called only with known schemes, so we can dispatch on the
2968 switch (TOUPPER (*au))
2970 case 'B': /* Basic */
2972 return basic_authentication_encode (user, passwd);
2973 #ifdef ENABLE_DIGEST
2974 case 'D': /* Digest */
2976 return digest_authentication_encode (au, user, passwd, method, path);
2979 case 'N': /* NTLM */
2980 if (!ntlm_input (&pconn.ntlm, au))
2985 return ntlm_output (&pconn.ntlm, user, passwd, finished);
2988 /* We shouldn't get here -- this function should be only called
2989 with values approved by known_authentication_scheme_p. */
2997 if (!wget_cookie_jar)
2998 wget_cookie_jar = cookie_jar_new ();
2999 if (opt.cookies_input && !cookies_loaded_p)
3001 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
3002 cookies_loaded_p = true;
3009 if (wget_cookie_jar)
3010 cookie_jar_save (wget_cookie_jar, opt.cookies_output);
3016 xfree_null (pconn.host);
3017 if (wget_cookie_jar)
3018 cookie_jar_delete (wget_cookie_jar);
3025 test_parse_content_disposition()
3033 { "filename=\"file.ext\"", "file.ext", true },
3034 { "attachment; filename=\"file.ext\"", "file.ext", true },
3035 { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
3036 { "attachment", NULL, false },
3039 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
3042 bool res = parse_content_disposition (test_array[i].hdrval, &filename);
3044 mu_assert ("test_parse_content_disposition: wrong result",
3045 res == test_array[i].result
3047 || 0 == strcmp (test_array[i].filename, filename)));
3053 #endif /* TESTING */