2 Copyright (C) 1996-2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
55 # include "http-ntlm.h"
68 extern char *version_string;
71 # define MIN(x, y) ((x) > (y) ? (y) : (x))
75 static bool cookies_loaded_p;
76 static struct cookie_jar *wget_cookie_jar;
78 #define TEXTHTML_S "text/html"
79 #define TEXTXHTML_S "application/xhtml+xml"
81 /* Some status code validation macros: */
82 #define H_20X(x) (((x) >= 200) && ((x) < 300))
83 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
84 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
85 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
86 || (x) == HTTP_STATUS_SEE_OTHER \
87 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
89 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
91 #define HTTP_STATUS_OK 200
92 #define HTTP_STATUS_CREATED 201
93 #define HTTP_STATUS_ACCEPTED 202
94 #define HTTP_STATUS_NO_CONTENT 204
95 #define HTTP_STATUS_PARTIAL_CONTENTS 206
97 /* Redirection 3xx. */
98 #define HTTP_STATUS_MULTIPLE_CHOICES 300
99 #define HTTP_STATUS_MOVED_PERMANENTLY 301
100 #define HTTP_STATUS_MOVED_TEMPORARILY 302
101 #define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */
102 #define HTTP_STATUS_NOT_MODIFIED 304
103 #define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */
105 /* Client error 4xx. */
106 #define HTTP_STATUS_BAD_REQUEST 400
107 #define HTTP_STATUS_UNAUTHORIZED 401
108 #define HTTP_STATUS_FORBIDDEN 403
109 #define HTTP_STATUS_NOT_FOUND 404
110 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
112 /* Server errors 5xx. */
113 #define HTTP_STATUS_INTERNAL 500
114 #define HTTP_STATUS_NOT_IMPLEMENTED 501
115 #define HTTP_STATUS_BAD_GATEWAY 502
116 #define HTTP_STATUS_UNAVAILABLE 503
119 rel_none, rel_name, rel_value, rel_both
126 struct request_header {
128 enum rp release_policy;
130 int hcount, hcapacity;
133 /* Create a new, empty request. At least request_set_method must be
134 called before the request can be used. */
136 static struct request *
139 struct request *req = xnew0 (struct request);
141 req->headers = xnew_array (struct request_header, req->hcapacity);
145 /* Set the request's method and its arguments. METH should be a
146 literal string (or it should outlive the request) because it will
147 not be freed. ARG will be freed by request_free. */
150 request_set_method (struct request *req, const char *meth, char *arg)
156 /* Return the method string passed with the last call to
157 request_set_method. */
160 request_method (const struct request *req)
165 /* Free one header according to the release policy specified with
166 request_set_header. */
169 release_header (struct request_header *hdr)
171 switch (hdr->release_policy)
188 /* Set the request named NAME to VALUE. Specifically, this means that
189 a "NAME: VALUE\r\n" header line will be used in the request. If a
190 header with the same name previously existed in the request, its
191 value will be replaced by this one. A NULL value means do nothing.
193 RELEASE_POLICY determines whether NAME and VALUE should be released
194 (freed) with request_free. Allowed values are:
196 - rel_none - don't free NAME or VALUE
197 - rel_name - free NAME when done
198 - rel_value - free VALUE when done
199 - rel_both - free both NAME and VALUE when done
201 Setting release policy is useful when arguments come from different
202 sources. For example:
204 // Don't free literal strings!
205 request_set_header (req, "Pragma", "no-cache", rel_none);
207 // Don't free a global variable, we'll need it later.
208 request_set_header (req, "Referer", opt.referer, rel_none);
210 // Value freshly allocated, free it when done.
211 request_set_header (req, "Range",
212 aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
217 request_set_header (struct request *req, char *name, char *value,
218 enum rp release_policy)
220 struct request_header *hdr;
225 /* A NULL value is a no-op; if freeing the name is requested,
226 free it now to avoid leaks. */
227 if (release_policy == rel_name || release_policy == rel_both)
232 for (i = 0; i < req->hcount; i++)
234 hdr = &req->headers[i];
235 if (0 == strcasecmp (name, hdr->name))
237 /* Replace existing header. */
238 release_header (hdr);
241 hdr->release_policy = release_policy;
246 /* Install new header. */
248 if (req->hcount >= req->hcapacity)
250 req->hcapacity <<= 1;
251 req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
253 hdr = &req->headers[req->hcount++];
256 hdr->release_policy = release_policy;
259 /* Like request_set_header, but sets the whole header line, as
260 provided by the user using the `--header' option. For example,
261 request_set_user_header (req, "Foo: bar") works just like
262 request_set_header (req, "Foo", "bar"). */
265 request_set_user_header (struct request *req, const char *header)
268 const char *p = strchr (header, ':');
271 BOUNDED_TO_ALLOCA (header, p, name);
275 request_set_header (req, xstrdup (name), (char *) p, rel_name);
278 /* Remove the header with specified name from REQ. Returns true if
279 the header was actually removed, false otherwise. */
282 request_remove_header (struct request *req, char *name)
285 for (i = 0; i < req->hcount; i++)
287 struct request_header *hdr = &req->headers[i];
288 if (0 == strcasecmp (name, hdr->name))
290 release_header (hdr);
291 /* Move the remaining headers by one. */
292 if (i < req->hcount - 1)
293 memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
301 #define APPEND(p, str) do { \
302 int A_len = strlen (str); \
303 memcpy (p, str, A_len); \
307 /* Construct the request and write it to FD using fd_write. */
310 request_send (const struct request *req, int fd)
312 char *request_string, *p;
313 int i, size, write_error;
315 /* Count the request size. */
318 /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
319 size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
321 for (i = 0; i < req->hcount; i++)
323 struct request_header *hdr = &req->headers[i];
324 /* NAME ": " VALUE "\r\n" */
325 size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
331 p = request_string = alloca_array (char, size);
333 /* Generate the request. */
335 APPEND (p, req->method); *p++ = ' ';
336 APPEND (p, req->arg); *p++ = ' ';
337 memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
339 for (i = 0; i < req->hcount; i++)
341 struct request_header *hdr = &req->headers[i];
342 APPEND (p, hdr->name);
343 *p++ = ':', *p++ = ' ';
344 APPEND (p, hdr->value);
345 *p++ = '\r', *p++ = '\n';
348 *p++ = '\r', *p++ = '\n', *p++ = '\0';
349 assert (p - request_string == size);
353 DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
355 /* Send the request to the server. */
357 write_error = fd_write (fd, request_string, size - 1, -1);
359 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
364 /* Release the resources used by REQ. */
367 request_free (struct request *req)
370 xfree_null (req->arg);
371 for (i = 0; i < req->hcount; i++)
372 release_header (&req->headers[i]);
373 xfree_null (req->headers);
377 /* Send the contents of FILE_NAME to SOCK. Make sure that exactly
378 PROMISED_SIZE bytes are sent over the wire -- if the file is
379 longer, read only that much; if the file is shorter, report an error. */
382 post_file (int sock, const char *file_name, wgint promised_size)
384 static char chunk[8192];
389 DEBUGP (("[writing POST file %s ... ", file_name));
391 fp = fopen (file_name, "rb");
394 while (!feof (fp) && written < promised_size)
397 int length = fread (chunk, 1, sizeof (chunk), fp);
400 towrite = MIN (promised_size - written, length);
401 write_error = fd_write (sock, chunk, towrite, -1);
411 /* If we've written less than was promised, report a (probably
412 nonsensical) error rather than break the promise. */
413 if (written < promised_size)
419 assert (written == promised_size);
420 DEBUGP (("done]\n"));
424 /* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
425 If so, return the pointer to the position after the line, otherwise
426 return NULL. This is used as callback to fd_read_hunk. The data
427 between START and PEEKED has been read and cannot be "unread"; the
428 data after PEEKED has only been peeked. */
431 response_head_terminator (const char *start, const char *peeked, int peeklen)
435 /* If at first peek, verify whether HUNK starts with "HTTP". If
436 not, this is a HTTP/0.9 request and we must bail out without
438 if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
441 /* Look for "\n[\r]\n", and return the following position if found.
442 Start two chars before the current to cover the possibility that
443 part of the terminator (e.g. "\n\r") arrived in the previous
445 p = peeked - start < 2 ? start : peeked - 2;
446 end = peeked + peeklen;
448 /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
449 for (; p < end - 2; p++)
452 if (p[1] == '\r' && p[2] == '\n')
454 else if (p[1] == '\n')
457 /* p==end-2: check for \n\n directly preceding END. */
458 if (p[0] == '\n' && p[1] == '\n')
464 /* The maximum size of a single HTTP response we care to read. Rather
465 than being a limit of the reader implementation, this limit
466 prevents Wget from slurping all available memory upon encountering
467 malicious or buggy server output, thus protecting the user. Define
468 it to 0 to remove the limit. */
470 #define HTTP_RESPONSE_MAX_SIZE 65536
472 /* Read the HTTP request head from FD and return it. The error
473 conditions are the same as with fd_read_hunk.
475 To support HTTP/0.9 responses, this function tries to make sure
476 that the data begins with "HTTP". If this is not the case, no data
477 is read and an empty request is returned, so that the remaining
478 data can be treated as body. */
481 read_http_response_head (int fd)
483 return fd_read_hunk (fd, response_head_terminator, 512,
484 HTTP_RESPONSE_MAX_SIZE);
488 /* The response data. */
491 /* The array of pointers that indicate where each header starts.
492 For example, given this HTTP response:
499 The headers are located like this:
501 "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
503 headers[0] headers[1] headers[2] headers[3]
505 I.e. headers[0] points to the beginning of the request,
506 headers[1] points to the end of the first header and the
507 beginning of the second one, etc. */
509 const char **headers;
512 /* Create a new response object from the text of the HTTP response,
513 available in HEAD. That text is automatically split into
514 constituent header lines for fast retrieval using
517 static struct response *
518 resp_new (const char *head)
523 struct response *resp = xnew0 (struct response);
528 /* Empty head means that we're dealing with a headerless
529 (HTTP/0.9) response. In that case, don't set HEADERS at
534 /* Split HEAD into header lines, so that resp_header_* functions
535 don't need to do this over and over again. */
541 DO_REALLOC (resp->headers, size, count + 1, const char *);
542 resp->headers[count++] = hdr;
544 /* Break upon encountering an empty line. */
545 if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
548 /* Find the end of HDR, including continuations. */
551 const char *end = strchr (hdr, '\n');
557 while (*hdr == ' ' || *hdr == '\t');
559 DO_REALLOC (resp->headers, size, count + 1, const char *);
560 resp->headers[count] = NULL;
565 /* Locate the header named NAME in the request data, starting with
566 position START. This allows the code to loop through the request
567 data, filtering for all requests of a given name. Returns the
568 found position, or -1 for failure. The code that uses this
569 function typically looks like this:
571 for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
572 ... do something with header ...
574 If you only care about one header, use resp_header_get instead of
578 resp_header_locate (const struct response *resp, const char *name, int start,
579 const char **begptr, const char **endptr)
582 const char **headers = resp->headers;
585 if (!headers || !headers[1])
588 name_len = strlen (name);
594 for (; headers[i + 1]; i++)
596 const char *b = headers[i];
597 const char *e = headers[i + 1];
599 && b[name_len] == ':'
600 && 0 == strncasecmp (b, name, name_len))
603 while (b < e && ISSPACE (*b))
605 while (b < e && ISSPACE (e[-1]))
615 /* Find and retrieve the header named NAME in the request data. If
616 found, set *BEGPTR to its starting, and *ENDPTR to its ending
617 position, and return true. Otherwise return false.
619 This function is used as a building block for resp_header_copy
620 and resp_header_strdup. */
623 resp_header_get (const struct response *resp, const char *name,
624 const char **begptr, const char **endptr)
626 int pos = resp_header_locate (resp, name, 0, begptr, endptr);
630 /* Copy the response header named NAME to buffer BUF, no longer than
631 BUFSIZE (BUFSIZE includes the terminating 0). If the header
632 exists, true is returned, false otherwise. If there should be no
633 limit on the size of the header, use resp_header_strdup instead.
635 If BUFSIZE is 0, no data is copied, but the boolean indication of
636 whether the header is present is still returned. */
639 resp_header_copy (const struct response *resp, const char *name,
640 char *buf, int bufsize)
643 if (!resp_header_get (resp, name, &b, &e))
647 int len = MIN (e - b, bufsize - 1);
648 memcpy (buf, b, len);
654 /* Return the value of header named NAME in RESP, allocated with
655 malloc. If such a header does not exist in RESP, return NULL. */
658 resp_header_strdup (const struct response *resp, const char *name)
661 if (!resp_header_get (resp, name, &b, &e))
663 return strdupdelim (b, e);
666 /* Parse the HTTP status line, which is of format:
668 HTTP-Version SP Status-Code SP Reason-Phrase
670 The function returns the status-code, or -1 if the status line
671 appears malformed. The pointer to "reason-phrase" message is
672 returned in *MESSAGE. */
675 resp_status (const struct response *resp, char **message)
682 /* For a HTTP/0.9 response, assume status 200. */
684 *message = xstrdup (_("No headers, assuming HTTP/0.9"));
688 p = resp->headers[0];
689 end = resp->headers[1];
695 if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
699 /* Match the HTTP version. This is optional because Gnutella
700 servers have been reported to not specify HTTP version. */
701 if (p < end && *p == '/')
704 while (p < end && ISDIGIT (*p))
706 if (p < end && *p == '.')
708 while (p < end && ISDIGIT (*p))
712 while (p < end && ISSPACE (*p))
714 if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
717 status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
722 while (p < end && ISSPACE (*p))
724 while (p < end && ISSPACE (end[-1]))
726 *message = strdupdelim (p, end);
732 /* Release the resources used by RESP. */
735 resp_free (struct response *resp)
737 xfree_null (resp->headers);
741 /* Print the server response, line by line, omitting the trailing CRLF
742 from individual header lines, and prefixed with PREFIX. */
745 print_server_response (const struct response *resp, const char *prefix)
750 for (i = 0; resp->headers[i + 1]; i++)
752 const char *b = resp->headers[i];
753 const char *e = resp->headers[i + 1];
755 if (b < e && e[-1] == '\n')
757 if (b < e && e[-1] == '\r')
759 /* This is safe even on printfs with broken handling of "%.<n>s"
760 because resp->headers ends with \0. */
761 logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
765 /* Parse the `Content-Range' header and extract the information it
766 contains. Returns true if successful, false otherwise. */
768 parse_content_range (const char *hdr, wgint *first_byte_ptr,
769 wgint *last_byte_ptr, wgint *entity_length_ptr)
773 /* Ancient versions of Netscape proxy server, presumably predating
774 rfc2068, sent out `Content-Range' without the "bytes"
776 if (0 == strncasecmp (hdr, "bytes", 5))
779 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
783 while (ISSPACE (*hdr))
790 for (num = 0; ISDIGIT (*hdr); hdr++)
791 num = 10 * num + (*hdr - '0');
792 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
794 *first_byte_ptr = num;
796 for (num = 0; ISDIGIT (*hdr); hdr++)
797 num = 10 * num + (*hdr - '0');
798 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
800 *last_byte_ptr = num;
802 for (num = 0; ISDIGIT (*hdr); hdr++)
803 num = 10 * num + (*hdr - '0');
804 *entity_length_ptr = num;
808 /* Read the body of the request, but don't store it anywhere and don't
809 display a progress gauge. This is useful for reading the bodies of
810 administrative responses to which we will soon issue another
811 request. The response is not useful to the user, but reading it
812 allows us to continue using the same connection to the server.
814 If reading fails, false is returned, true otherwise. In debug
815 mode, the body is displayed for debugging purposes. */
818 skip_short_body (int fd, wgint contlen)
821 SKIP_SIZE = 512, /* size of the download buffer */
822 SKIP_THRESHOLD = 4096 /* the largest size we read */
824 char dlbuf[SKIP_SIZE + 1];
825 dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
827 /* We shouldn't get here with unknown contlen. (This will change
828 with HTTP/1.1, which supports "chunked" transfer.) */
829 assert (contlen != -1);
831 /* If the body is too large, it makes more sense to simply close the
832 connection than to try to read the body. */
833 if (contlen > SKIP_THRESHOLD)
836 DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
840 int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
843 /* Don't normally report the error since this is an
844 optimization that should be invisible to the user. */
845 DEBUGP (("] aborting (%s).\n",
846 ret < 0 ? fd_errstr (fd) : "EOF received"));
850 /* Safe even if %.*s bogusly expects terminating \0 because
851 we've zero-terminated dlbuf above. */
852 DEBUGP (("%.*s", ret, dlbuf));
855 DEBUGP (("] done.\n"));
859 /* Extract a parameter from the string (typically an HTTP header) at
860 **SOURCE and advance SOURCE to the next parameter. Return false
861 when there are no more parameters to extract. The name of the
862 parameter is returned in NAME, and the value in VALUE. If the
863 parameter has no value, the token's value is zeroed out.
865 For example, if *SOURCE points to the string "attachment;
866 filename=\"foo bar\"", the first call to this function will return
867 the token named "attachment" and no value, and the second call will
868 return the token named "filename" and value "foo bar". The third
869 call will return false, indicating no more valid tokens. */
872 extract_param (const char **source, param_token *name, param_token *value,
875 const char *p = *source;
877 while (ISSPACE (*p)) ++p;
881 return false; /* no error; nothing more to extract */
886 while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
888 if (name->b == name->e)
889 return false; /* empty name: error */
890 while (ISSPACE (*p)) ++p;
891 if (*p == separator || !*p) /* no value */
894 if (*p == separator) ++p;
899 return false; /* error */
901 /* *p is '=', extract value */
903 while (ISSPACE (*p)) ++p;
904 if (*p == '"') /* quoted */
907 while (*p && *p != '"') ++p;
911 /* Currently at closing quote; find the end of param. */
912 while (ISSPACE (*p)) ++p;
913 while (*p && *p != separator) ++p;
917 /* garbage after closed quote, e.g. foo="bar"baz */
923 while (*p && *p != separator) ++p;
925 while (value->e != value->b && ISSPACE (value->e[-1]))
927 if (*p == separator) ++p;
934 #define MAX(p, q) ((p) > (q) ? (p) : (q))
936 /* Parse the contents of the `Content-Disposition' header, extracting
937 the information useful to Wget. Content-Disposition is a header
938 borrowed from MIME; when used in HTTP, it typically serves for
939 specifying the desired file name of the resource. For example:
941 Content-Disposition: attachment; filename="flora.jpg"
943 Wget will skip the tokens it doesn't care about, such as
944 "attachment" in the previous example; it will also skip other
945 unrecognized params. If the header is syntactically correct and
946 contains a file name, a copy of the file name is stored in
947 *filename and true is returned. Otherwise, the function returns
950 The file name is stripped of directory components and must not be
954 parse_content_disposition (const char *hdr, char **filename)
956 param_token name, value;
957 while (extract_param (&hdr, &name, &value, ';'))
958 if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
960 /* Make the file name begin at the last slash or backslash. */
961 const char *last_slash = memrchr (value.b, '/', value.e - value.b);
962 const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
963 if (last_slash && last_bs)
964 value.b = 1 + MAX (last_slash, last_bs);
965 else if (last_slash || last_bs)
966 value.b = 1 + (last_slash ? last_slash : last_bs);
967 if (value.b == value.e)
969 /* Start with the directory prefix, if specified. */
972 int prefix_length = strlen (opt.dir_prefix);
973 bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
978 total_length = prefix_length + (value.e - value.b);
979 *filename = xmalloc (total_length + 1);
980 strcpy (*filename, opt.dir_prefix);
982 (*filename)[prefix_length - 1] = '/';
983 memcpy (*filename + prefix_length, value.b, (value.e - value.b));
984 (*filename)[total_length] = '\0';
987 *filename = strdupdelim (value.b, value.e);
993 /* Persistent connections. Currently, we cache the most recently used
994 connection as persistent, provided that the HTTP server agrees to
995 make it such. The persistence data is stored in the variables
996 below. Ideally, it should be possible to cache an arbitrary fixed
997 number of these connections. */
999 /* Whether a persistent connection is active. */
1000 static bool pconn_active;
1003 /* The socket of the connection. */
1006 /* Host and port of the currently active persistent connection. */
1010 /* Whether a ssl handshake has occoured on this connection. */
1013 /* Whether the connection was authorized. This is only done by
1014 NTLM, which authorizes *connections* rather than individual
1015 requests. (That practice is peculiar for HTTP, but it is a
1016 useful optimization.) */
1020 /* NTLM data of the current connection. */
1021 struct ntlmdata ntlm;
1025 /* Mark the persistent connection as invalid and free the resources it
1026 uses. This is used by the CLOSE_* macros after they forcefully
1027 close a registered persistent connection. */
1030 invalidate_persistent (void)
1032 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
1033 pconn_active = false;
1034 fd_close (pconn.socket);
1039 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
1040 persistent. This will enable someone to use the same connection
1041 later. In the context of HTTP, this must be called only AFTER the
1042 response has been received and the server has promised that the
1043 connection will remain alive.
1045 If a previous connection was persistent, it is closed. */
1048 register_persistent (const char *host, int port, int fd, bool ssl)
1052 if (pconn.socket == fd)
1054 /* The connection FD is already registered. */
1059 /* The old persistent connection is still active; close it
1060 first. This situation arises whenever a persistent
1061 connection exists, but we then connect to a different
1062 host, and try to register a persistent connection to that
1064 invalidate_persistent ();
1068 pconn_active = true;
1070 pconn.host = xstrdup (host);
1073 pconn.authorized = false;
1075 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
1078 /* Return true if a persistent connection is available for connecting
1082 persistent_available_p (const char *host, int port, bool ssl,
1083 bool *host_lookup_failed)
1085 /* First, check whether a persistent connection is active at all. */
1089 /* If we want SSL and the last connection wasn't or vice versa,
1090 don't use it. Checking for host and port is not enough because
1091 HTTP and HTTPS can apparently coexist on the same port. */
1092 if (ssl != pconn.ssl)
1095 /* If we're not connecting to the same port, we're not interested. */
1096 if (port != pconn.port)
1099 /* If the host is the same, we're in business. If not, there is
1100 still hope -- read below. */
1101 if (0 != strcasecmp (host, pconn.host))
1103 /* Check if pconn.socket is talking to HOST under another name.
1104 This happens often when both sites are virtual hosts
1105 distinguished only by name and served by the same network
1106 interface, and hence the same web server (possibly set up by
1107 the ISP and serving many different web sites). This
1108 admittedly unconventional optimization does not contradict
1109 HTTP and works well with popular server software. */
1113 struct address_list *al;
1116 /* Don't try to talk to two different SSL sites over the same
1117 secure connection! (Besides, it's not clear that
1118 name-based virtual hosting is even possible with SSL.) */
1121 /* If pconn.socket's peer is one of the IP addresses HOST
1122 resolves to, pconn.socket is for all intents and purposes
1123 already talking to HOST. */
1125 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
1127 /* Can't get the peer's address -- something must be very
1128 wrong with the connection. */
1129 invalidate_persistent ();
1132 al = lookup_host (host, 0);
1135 *host_lookup_failed = true;
1139 found = address_list_contains (al, &ip);
1140 address_list_release (al);
1145 /* The persistent connection's peer address was found among the
1146 addresses HOST resolved to; therefore, pconn.sock is in fact
1147 already talking to HOST -- no need to reconnect. */
1150 /* Finally, check whether the connection is still open. This is
1151 important because most servers implement liberal (short) timeout
1152 on persistent connections. Wget can of course always reconnect
1153 if the connection doesn't work out, but it's nicer to know in
1154 advance. This test is a logical followup of the first test, but
1155 is "expensive" and therefore placed at the end of the list.
1157 (Current implementation of test_socket_open has a nice side
1158 effect that it treats sockets with pending data as "closed".
1159 This is exactly what we want: if a broken server sends message
1160 body in response to HEAD, or if it sends more than conent-length
1161 data, we won't reuse the corrupted connection.) */
1163 if (!test_socket_open (pconn.socket))
1165 /* Oops, the socket is no longer open. Now that we know that,
1166 let's invalidate the persistent connection before returning
1168 invalidate_persistent ();
1175 /* The idea behind these two CLOSE macros is to distinguish between
1176 two cases: one when the job we've been doing is finished, and we
1177 want to close the connection and leave, and two when something is
1178 seriously wrong and we're closing the connection as part of
1181 In case of keep_alive, CLOSE_FINISH should leave the connection
1182 open, while CLOSE_INVALIDATE should still close it.
1184 Note that the semantics of the flag `keep_alive' is "this
1185 connection *will* be reused (the server has promised not to close
1186 the connection once we're done)", while the semantics of
1187 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1188 active, registered connection". */
1190 #define CLOSE_FINISH(fd) do { \
1193 if (pconn_active && (fd) == pconn.socket) \
1194 invalidate_persistent (); \
1203 #define CLOSE_INVALIDATE(fd) do { \
1204 if (pconn_active && (fd) == pconn.socket) \
1205 invalidate_persistent (); \
1213 wgint len; /* received length */
1214 wgint contlen; /* expected length */
1215 wgint restval; /* the restart value */
1216 int res; /* the result of last read */
1217 char *rderrmsg; /* error message from read error */
1218 char *newloc; /* new location (redirection) */
1219 char *remote_time; /* remote time-stamp string */
1220 char *error; /* textual HTTP error */
1221 int statcode; /* status code */
1222 wgint rd_size; /* amount of data read from socket */
1223 double dltime; /* time it took to download the data */
1224 const char *referer; /* value of the referer header. */
1225 char *local_file; /* local file name. */
1226 bool timestamp_checked; /* true if pre-download time-stamping checks
1227 * have already been performed */
1228 char *orig_file_name; /* name of file to compare for time-stamping
1229 * (might be != local_file if -K is set) */
1230 wgint orig_file_size; /* size of file to compare for time-stamping */
1231 time_t orig_file_tstamp; /* time-stamp of file to compare for
1236 free_hstat (struct http_stat *hs)
1238 xfree_null (hs->newloc);
1239 xfree_null (hs->remote_time);
1240 xfree_null (hs->error);
1241 xfree_null (hs->rderrmsg);
1242 xfree_null (hs->local_file);
1243 xfree_null (hs->orig_file_name);
1245 /* Guard against being called twice. */
1247 hs->remote_time = NULL;
1251 static char *create_authorization_line (const char *, const char *,
1252 const char *, const char *,
1253 const char *, bool *);
1254 static char *basic_authentication_encode (const char *, const char *);
1255 static bool known_authentication_scheme_p (const char *, const char *);
1256 static void load_cookies (void);
1258 #define BEGINS_WITH(line, string_constant) \
1259 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1260 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1261 || !line[sizeof (string_constant) - 1]))
1263 #define SET_USER_AGENT(req) do { \
1264 if (!opt.useragent) \
1265 request_set_header (req, "User-Agent", \
1266 aprintf ("Wget/%s", version_string), rel_value); \
1267 else if (*opt.useragent) \
1268 request_set_header (req, "User-Agent", opt.useragent, rel_none); \
1271 /* The flags that allow clobbering the file (opening with "wb").
1272 Defined here to avoid repetition later. #### This will require
1274 #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1275 || opt.dirstruct || opt.output_document)
1277 /* Retrieve a document through HTTP protocol. It recognizes status
1278 code, and correctly handles redirections. It closes the network
1279 socket. If it receives an error from the functions below it, it
1280 will print it if there is enough information to do so (almost
1281 always), returning the error to the caller (i.e. http_loop).
1283 Various HTTP parameters are stored to hs.
1285 If PROXY is non-NULL, the connection will be made to the proxy
1286 server, and u->url will be requested. */
1288 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1290 struct request *req;
1293 char *user, *passwd;
1297 wgint contlen, contrange;
1304 /* Set to 1 when the authorization has failed permanently and should
1305 not be tried again. */
1306 bool auth_finished = false;
1308 /* Whether NTLM authentication is used for this request. */
1309 bool ntlm_seen = false;
1311 /* Whether our connection to the remote host is through SSL. */
1312 bool using_ssl = false;
1314 /* Whether a HEAD request will be issued (as opposed to GET or
1316 bool head_only = !!(*dt & HEAD_ONLY);
1319 struct response *resp;
1323 /* Whether this connection will be kept alive after the HTTP request
1327 /* Whether keep-alive should be inhibited.
1329 RFC 2068 requests that 1.0 clients not send keep-alive requests
1330 to proxies. This is because many 1.0 proxies do not interpret
1331 the Connection header and transfer it to the remote server,
1332 causing it to not close the connection and leave both the proxy
1333 and the client hanging. */
1334 bool inhibit_keep_alive =
1335 !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1337 /* Headers sent when using POST. */
1338 wgint post_data_size = 0;
1340 bool host_lookup_failed = false;
1343 if (u->scheme == SCHEME_HTTPS)
1345 /* Initialize the SSL context. After this has once been done,
1346 it becomes a no-op. */
1349 scheme_disable (SCHEME_HTTPS);
1350 logprintf (LOG_NOTQUIET,
1351 _("Disabling SSL due to encountered errors.\n"));
1352 return SSLINITFAILED;
1355 #endif /* HAVE_SSL */
1357 /* Initialize certain elements of struct http_stat. */
1361 hs->rderrmsg = NULL;
1363 hs->remote_time = NULL;
1368 /* Prepare the request to send. */
1370 req = request_new ();
1373 const char *meth = "GET";
1376 else if (opt.post_file_name || opt.post_data)
1378 /* Use the full path, i.e. one that includes the leading slash and
1379 the query string. E.g. if u->path is "foo/bar" and u->query is
1380 "param=value", full_path will be "/foo/bar?param=value". */
1383 /* When using SSL over proxy, CONNECT establishes a direct
1384 connection to the HTTPS server. Therefore use the same
1385 argument as when talking to the server directly. */
1386 && u->scheme != SCHEME_HTTPS
1389 meth_arg = xstrdup (u->url);
1391 meth_arg = url_full_path (u);
1392 request_set_method (req, meth, meth_arg);
1395 request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1396 if (*dt & SEND_NOCACHE)
1397 request_set_header (req, "Pragma", "no-cache", rel_none);
1399 request_set_header (req, "Range",
1400 aprintf ("bytes=%s-",
1401 number_to_static_string (hs->restval)),
1403 SET_USER_AGENT (req);
1404 request_set_header (req, "Accept", "*/*", rel_none);
1406 /* Find the username and password for authentication. */
1409 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1410 user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1411 passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1415 /* We have the username and the password, but haven't tried
1416 any authorization yet. Let's see if the "Basic" method
1417 works. If not, we'll come back here and construct a
1418 proper authorization method with the right challenges.
1420 If we didn't employ this kind of logic, every URL that
1421 requires authorization would have to be processed twice,
1422 which is very suboptimal and generates a bunch of false
1423 "unauthorized" errors in the server log.
1425 #### But this logic also has a serious problem when used
1426 with stronger authentications: we *first* transmit the
1427 username and the password in clear text, and *then* attempt a
1428 stronger authentication scheme. That cannot be right! We
1429 are only fortunate that almost everyone still uses the
1430 `Basic' scheme anyway.
1432 There should be an option to prevent this from happening, for
1433 those who use strong authentication schemes and value their
1435 request_set_header (req, "Authorization",
1436 basic_authentication_encode (user, passwd),
1443 char *proxy_user, *proxy_passwd;
1444 /* For normal username and password, URL components override
1445 command-line/wgetrc parameters. With proxy
1446 authentication, it's the reverse, because proxy URLs are
1447 normally the "permanent" ones, so command-line args
1448 should take precedence. */
1449 if (opt.proxy_user && opt.proxy_passwd)
1451 proxy_user = opt.proxy_user;
1452 proxy_passwd = opt.proxy_passwd;
1456 proxy_user = proxy->user;
1457 proxy_passwd = proxy->passwd;
1459 /* #### This does not appear right. Can't the proxy request,
1460 say, `Digest' authentication? */
1461 if (proxy_user && proxy_passwd)
1462 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1464 /* If we're using a proxy, we will be connecting to the proxy
1468 /* Proxy authorization over SSL is handled below. */
1470 if (u->scheme != SCHEME_HTTPS)
1472 request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1475 /* Generate the Host header, HOST:PORT. Take into account that:
1477 - Broken server-side software often doesn't recognize the PORT
1478 argument, so we must generate "Host: www.server.com" instead of
1479 "Host: www.server.com:80" (and likewise for https port).
1481 - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
1482 becomes ambiguous and needs to be rewritten as "Host:
1483 [3ffe:8100:200:2::2]:1234". */
1485 /* Formats arranged for hfmt[add_port][add_squares]. */
1486 static const char *hfmt[][2] = {
1487 { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
1489 int add_port = u->port != scheme_default_port (u->scheme);
1490 int add_squares = strchr (u->host, ':') != NULL;
1491 request_set_header (req, "Host",
1492 aprintf (hfmt[add_port][add_squares], u->host, u->port),
1496 if (!inhibit_keep_alive)
1497 request_set_header (req, "Connection", "Keep-Alive", rel_none);
1500 request_set_header (req, "Cookie",
1501 cookie_header (wget_cookie_jar,
1502 u->host, u->port, u->path,
1504 u->scheme == SCHEME_HTTPS
1511 if (opt.post_data || opt.post_file_name)
1513 request_set_header (req, "Content-Type",
1514 "application/x-www-form-urlencoded", rel_none);
1516 post_data_size = strlen (opt.post_data);
1519 post_data_size = file_size (opt.post_file_name);
1520 if (post_data_size == -1)
1522 logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
1523 opt.post_file_name, strerror (errno));
1527 request_set_header (req, "Content-Length",
1528 xstrdup (number_to_static_string (post_data_size)),
1532 /* Add the user headers. */
1533 if (opt.user_headers)
1536 for (i = 0; opt.user_headers[i]; i++)
1537 request_set_user_header (req, opt.user_headers[i]);
1541 /* We need to come back here when the initial attempt to retrieve
1542 without authorization header fails. (Expected to happen at least
1543 for the Digest authorization scheme.) */
1547 /* Establish the connection. */
1549 if (!inhibit_keep_alive)
1551 /* Look for a persistent connection to target host, unless a
1552 proxy is used. The exception is when SSL is in use, in which
1553 case the proxy is nothing but a passthrough to the target
1554 host, registered as a connection to the latter. */
1555 struct url *relevant = conn;
1557 if (u->scheme == SCHEME_HTTPS)
1561 if (persistent_available_p (relevant->host, relevant->port,
1563 relevant->scheme == SCHEME_HTTPS,
1567 &host_lookup_failed))
1569 sock = pconn.socket;
1570 using_ssl = pconn.ssl;
1571 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1572 escnonprint (pconn.host), pconn.port);
1573 DEBUGP (("Reusing fd %d.\n", sock));
1574 if (pconn.authorized)
1575 /* If the connection is already authorized, the "Basic"
1576 authorization added by code above is unnecessary and
1578 request_remove_header (req, "Authorization");
1584 /* In its current implementation, persistent_available_p will
1585 look up conn->host in some cases. If that lookup failed, we
1586 don't need to bother with connect_to_host. */
1587 if (host_lookup_failed)
1593 sock = connect_to_host (conn->host, conn->port);
1602 return (retryable_socket_connect_error (errno)
1603 ? CONERROR : CONIMPOSSIBLE);
1607 if (proxy && u->scheme == SCHEME_HTTPS)
1609 /* When requesting SSL URLs through proxies, use the
1610 CONNECT method to request passthrough. */
1611 struct request *connreq = request_new ();
1612 request_set_method (connreq, "CONNECT",
1613 aprintf ("%s:%d", u->host, u->port));
1614 SET_USER_AGENT (connreq);
1617 request_set_header (connreq, "Proxy-Authorization",
1618 proxyauth, rel_value);
1619 /* Now that PROXYAUTH is part of the CONNECT request,
1620 zero it out so we don't send proxy authorization with
1621 the regular request below. */
1624 /* Examples in rfc2817 use the Host header in CONNECT
1625 requests. I don't see how that gains anything, given
1626 that the contents of Host would be exactly the same as
1627 the contents of CONNECT. */
1629 write_error = request_send (connreq, sock);
1630 request_free (connreq);
1631 if (write_error < 0)
1633 CLOSE_INVALIDATE (sock);
1637 head = read_http_response_head (sock);
1640 logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1642 CLOSE_INVALIDATE (sock);
1651 DEBUGP (("proxy responded with: [%s]\n", head));
1653 resp = resp_new (head);
1654 statcode = resp_status (resp, &message);
1657 if (statcode != 200)
1660 logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1661 message ? escnonprint (message) : "?");
1662 xfree_null (message);
1665 xfree_null (message);
1667 /* SOCK is now *really* connected to u->host, so update CONN
1668 to reflect this. That way register_persistent will
1669 register SOCK as being connected to u->host:u->port. */
1673 if (conn->scheme == SCHEME_HTTPS)
1675 if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
1682 #endif /* HAVE_SSL */
1685 /* Send the request to server. */
1686 write_error = request_send (req, sock);
1688 if (write_error >= 0)
1692 DEBUGP (("[POST data: %s]\n", opt.post_data));
1693 write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1695 else if (opt.post_file_name && post_data_size != 0)
1696 write_error = post_file (sock, opt.post_file_name, post_data_size);
1699 if (write_error < 0)
1701 CLOSE_INVALIDATE (sock);
1705 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1706 proxy ? "Proxy" : "HTTP");
1711 head = read_http_response_head (sock);
1716 logputs (LOG_NOTQUIET, _("No data received.\n"));
1717 CLOSE_INVALIDATE (sock);
1723 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1725 CLOSE_INVALIDATE (sock);
1730 DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1732 resp = resp_new (head);
1734 /* Check for status line. */
1736 statcode = resp_status (resp, &message);
1737 if (!opt.server_response)
1738 logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1739 message ? escnonprint (message) : "");
1742 logprintf (LOG_VERBOSE, "\n");
1743 print_server_response (resp, " ");
1746 /* Determine the local filename if needed. Notice that if -O is used
1747 * hstat.local_file is set by http_loop to the argument of -O. */
1748 if (!hs->local_file)
1750 /* Honor Content-Disposition whether possible. */
1751 if (!opt.content_disposition
1752 || !resp_header_copy (resp, "Content-Disposition",
1753 hdrval, sizeof (hdrval))
1754 || !parse_content_disposition (hdrval, &hs->local_file))
1756 /* The Content-Disposition header is missing or broken.
1757 * Choose unique file name according to given URL. */
1758 hs->local_file = url_file_name (u);
1762 /* TODO: perform this check only once. */
1763 if (file_exists_p (hs->local_file))
1767 /* If opt.noclobber is turned on and file already exists, do not
1768 retrieve the file */
1769 logprintf (LOG_VERBOSE, _("\
1770 File `%s' already there; not retrieving.\n\n"), hs->local_file);
1771 /* If the file is there, we suppose it's retrieved OK. */
1774 /* #### Bogusness alert. */
1775 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1776 if (has_html_suffix_p (hs->local_file))
1781 else if (!ALLOW_CLOBBER)
1783 char *unique = unique_name (hs->local_file, true);
1784 if (unique != hs->local_file)
1785 xfree (hs->local_file);
1786 hs->local_file = unique;
1790 /* Support timestamping */
1791 /* TODO: move this code out of gethttp. */
1792 if (opt.timestamping && !hs->timestamp_checked)
1794 size_t filename_len = strlen (hs->local_file);
1795 char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1796 bool local_dot_orig_file_exists = false;
1797 char *local_filename = NULL;
1800 if (opt.backup_converted)
1801 /* If -K is specified, we'll act on the assumption that it was specified
1802 last time these files were downloaded as well, and instead of just
1803 comparing local file X against server file X, we'll compare local
1804 file X.orig (if extant, else X) against server file X. If -K
1805 _wasn't_ specified last time, or the server contains files called
1806 *.orig, -N will be back to not operating correctly with -k. */
1808 /* Would a single s[n]printf() call be faster? --dan
1810 Definitely not. sprintf() is horribly slow. It's a
1811 different question whether the difference between the two
1812 affects a program. Usually I'd say "no", but at one
1813 point I profiled Wget, and found that a measurable and
1814 non-negligible amount of time was lost calling sprintf()
1815 in url.c. Replacing sprintf with inline calls to
1816 strcpy() and number_to_string() made a difference.
1818 memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
1819 memcpy (filename_plus_orig_suffix + filename_len,
1820 ".orig", sizeof (".orig"));
1822 /* Try to stat() the .orig file. */
1823 if (stat (filename_plus_orig_suffix, &st) == 0)
1825 local_dot_orig_file_exists = true;
1826 local_filename = filename_plus_orig_suffix;
1830 if (!local_dot_orig_file_exists)
1831 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1832 if (stat (hs->local_file, &st) == 0)
1833 local_filename = hs->local_file;
1835 if (local_filename != NULL)
1836 /* There was a local file, so we'll check later to see if the version
1837 the server has is the same version we already have, allowing us to
1840 hs->orig_file_name = xstrdup (local_filename);
1841 hs->orig_file_size = st.st_size;
1842 hs->orig_file_tstamp = st.st_mtime;
1844 /* Modification time granularity is 2 seconds for Windows, so
1845 increase local time by 1 second for later comparison. */
1846 ++hs->orig_file_tstamp;
1851 if (!opt.ignore_length
1852 && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1856 parsed = str_to_wgint (hdrval, NULL, 10);
1857 if (parsed == WGINT_MAX && errno == ERANGE)
1859 #### If Content-Length is out of range, it most likely
1860 means that the file is larger than 2G and that we're
1861 compiled without LFS. In that case we should probably
1862 refuse to even attempt to download the file. */
1868 /* Check for keep-alive related responses. */
1869 if (!inhibit_keep_alive && contlen != -1)
1871 if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1873 else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1875 if (0 == strcasecmp (hdrval, "Keep-Alive"))
1880 /* The server has promised that it will not close the connection
1881 when we're done. This means that we can register it. */
1882 register_persistent (conn->host, conn->port, sock, using_ssl);
1884 if (statcode == HTTP_STATUS_UNAUTHORIZED)
1886 /* Authorization is required. */
1887 if (keep_alive && !head_only && skip_short_body (sock, contlen))
1888 CLOSE_FINISH (sock);
1890 CLOSE_INVALIDATE (sock);
1891 pconn.authorized = false;
1892 if (!auth_finished && (user && passwd))
1894 /* IIS sends multiple copies of WWW-Authenticate, one with
1895 the value "negotiate", and other(s) with data. Loop over
1896 all the occurrences and pick the one we recognize. */
1898 const char *wabeg, *waend;
1899 char *www_authenticate = NULL;
1901 (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1902 &wabeg, &waend)) != -1;
1904 if (known_authentication_scheme_p (wabeg, waend))
1906 BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1910 if (!www_authenticate)
1911 /* If the authentication header is missing or
1912 unrecognized, there's no sense in retrying. */
1913 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1914 else if (BEGINS_WITH (www_authenticate, "Basic"))
1915 /* If the authentication scheme is "Basic", which we send
1916 by default, there's no sense in retrying either. (This
1917 should be changed when we stop sending "Basic" data by
1923 pth = url_full_path (u);
1924 request_set_header (req, "Authorization",
1925 create_authorization_line (www_authenticate,
1927 request_method (req),
1931 if (BEGINS_WITH (www_authenticate, "NTLM"))
1934 goto retry_with_auth;
1937 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1941 else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1943 /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1945 pconn.authorized = true;
1949 hs->statcode = statcode;
1951 hs->error = xstrdup (_("Malformed status line"));
1953 hs->error = xstrdup (_("(no description)"));
1955 hs->error = xstrdup (message);
1956 xfree_null (message);
1958 type = resp_header_strdup (resp, "Content-Type");
1961 char *tmp = strchr (type, ';');
1964 while (tmp > type && ISSPACE (tmp[-1]))
1969 hs->newloc = resp_header_strdup (resp, "Location");
1970 hs->remote_time = resp_header_strdup (resp, "Last-Modified");
1972 /* Handle (possibly multiple instances of) the Set-Cookie header. */
1976 const char *scbeg, *scend;
1977 /* The jar should have been created by now. */
1978 assert (wget_cookie_jar != NULL);
1980 (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1981 &scbeg, &scend)) != -1;
1984 char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1985 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
1986 u->path, set_cookie);
1990 if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1992 wgint first_byte_pos, last_byte_pos, entity_length;
1993 if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1995 contrange = first_byte_pos;
1999 /* 20x responses are counted among successful by default. */
2000 if (H_20X (statcode))
2003 /* Return if redirected. */
2004 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
2006 /* RFC2068 says that in case of the 300 (multiple choices)
2007 response, the server can output a preferred URL through
2008 `Location' header; otherwise, the request should be treated
2009 like GET. So, if the location is set, it will be a
2010 redirection; otherwise, just proceed normally. */
2011 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
2015 logprintf (LOG_VERBOSE,
2016 _("Location: %s%s\n"),
2017 hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
2018 hs->newloc ? _(" [following]") : "");
2019 if (keep_alive && !head_only && skip_short_body (sock, contlen))
2020 CLOSE_FINISH (sock);
2022 CLOSE_INVALIDATE (sock);
2028 /* If content-type is not given, assume text/html. This is because
2029 of the multitude of broken CGI's that "forget" to generate the
2032 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
2033 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
2038 if (opt.html_extension && (*dt & TEXTHTML))
2039 /* -E / --html-extension / html_extension = on was specified, and this is a
2040 text/html file. If some case-insensitive variation on ".htm[l]" isn't
2041 already the file's suffix, tack on ".html". */
2043 char *last_period_in_local_filename = strrchr (hs->local_file, '.');
2045 if (last_period_in_local_filename == NULL
2046 || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
2047 || 0 == strcasecmp (last_period_in_local_filename, ".html")))
2049 int local_filename_len = strlen (hs->local_file);
2050 /* Resize the local file, allowing for ".html" preceded by
2051 optional ".NUMBER". */
2052 hs->local_file = xrealloc (hs->local_file,
2053 local_filename_len + 24 + sizeof (".html"));
2054 strcpy(hs->local_file + local_filename_len, ".html");
2055 /* If clobbering is not allowed and the file, as named,
2056 exists, tack on ".NUMBER.html" instead. */
2057 if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
2061 sprintf (hs->local_file + local_filename_len,
2062 ".%d.html", ext_num++);
2063 while (file_exists_p (hs->local_file));
2065 *dt |= ADDED_HTML_EXTENSION;
2069 if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
2071 /* If `-c' is in use and the file has been fully downloaded (or
2072 the remote file has shrunk), Wget effectively requests bytes
2073 after the end of file and the server response with 416. */
2074 logputs (LOG_VERBOSE, _("\
2075 \n The file is already fully retrieved; nothing to do.\n\n"));
2076 /* In case the caller inspects. */
2079 /* Mark as successfully retrieved. */
2082 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
2083 might be more bytes in the body. */
2084 return RETRUNNEEDED;
2086 if ((contrange != 0 && contrange != hs->restval)
2087 || (H_PARTIAL (statcode) && !contrange))
2089 /* The Range request was somehow misunderstood by the server.
2092 CLOSE_INVALIDATE (sock);
2095 hs->contlen = contlen + contrange;
2101 /* No need to print this output if the body won't be
2102 downloaded at all, or if the original server response is
2104 logputs (LOG_VERBOSE, _("Length: "));
2107 logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
2108 if (contlen + contrange >= 1024)
2109 logprintf (LOG_VERBOSE, " (%s)",
2110 human_readable (contlen + contrange));
2113 if (contlen >= 1024)
2114 logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
2115 number_to_static_string (contlen),
2116 human_readable (contlen));
2118 logprintf (LOG_VERBOSE, _(", %s remaining"),
2119 number_to_static_string (contlen));
2123 logputs (LOG_VERBOSE,
2124 opt.ignore_length ? _("ignored") : _("unspecified"));
2126 logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
2128 logputs (LOG_VERBOSE, "\n");
2132 type = NULL; /* We don't need it any more. */
2134 /* Return if we have no intention of further downloading. */
2135 if (!(*dt & RETROKF) || head_only)
2137 /* In case the caller cares to look... */
2142 /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
2143 servers not to send body in response to a HEAD request, and
2144 those that do will likely be caught by test_socket_open.
2145 If not, they can be worked around using
2146 `--no-http-keep-alive'. */
2147 CLOSE_FINISH (sock);
2148 else if (keep_alive && skip_short_body (sock, contlen))
2149 /* Successfully skipped the body; also keep using the socket. */
2150 CLOSE_FINISH (sock);
2152 CLOSE_INVALIDATE (sock);
2153 return RETRFINISHED;
2156 /* Open the local file. */
2159 mkalldirs (hs->local_file);
2161 rotate_backups (hs->local_file);
2163 fp = fopen (hs->local_file, "ab");
2164 else if (ALLOW_CLOBBER)
2165 fp = fopen (hs->local_file, "wb");
2168 fp = fopen_excl (hs->local_file, true);
2169 if (!fp && errno == EEXIST)
2171 /* We cannot just invent a new name and use it (which is
2172 what functions like unique_create typically do)
2173 because we told the user we'd use this name.
2174 Instead, return and retry the download. */
2175 logprintf (LOG_NOTQUIET,
2176 _("%s has sprung into existence.\n"),
2178 CLOSE_INVALIDATE (sock);
2179 return FOPEN_EXCL_ERR;
2184 logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
2185 CLOSE_INVALIDATE (sock);
2192 /* Print fetch message, if opt.verbose. */
2195 logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
2196 HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
2199 /* This confuses the timestamping code that checks for file size.
2200 #### The timestamping code should be smarter about file size. */
2201 if (opt.save_headers && hs->restval == 0)
2202 fwrite (head, 1, strlen (head), fp);
2204 /* Now we no longer need to store the response header. */
2207 /* Download the request body. */
2210 /* If content-length is present, read that much; otherwise, read
2211 until EOF. The HTTP spec doesn't require the server to
2212 actually close the connection when it's done sending data. */
2213 flags |= rb_read_exactly;
2214 if (hs->restval > 0 && contrange == 0)
2215 /* If the server ignored our range request, instruct fd_read_body
2216 to skip the first RESTVAL bytes of body. */
2217 flags |= rb_skip_startpos;
2218 hs->len = hs->restval;
2220 hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
2221 hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
2225 CLOSE_FINISH (sock);
2229 hs->rderrmsg = xstrdup (fd_errstr (sock));
2230 CLOSE_INVALIDATE (sock);
2237 return RETRFINISHED;
2240 /* The genuine HTTP loop! This is the part where the retrieval is
2241 retried, and retried, and retried, and... */
2243 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
2244 int *dt, struct url *proxy)
2247 bool got_head = false; /* used for time-stamping and filename detection */
2248 bool got_name = false;
2251 uerr_t err, ret = TRYLIMEXC;
2252 time_t tmr = -1; /* remote time-stamp */
2253 wgint local_size = 0; /* the size of the local file */
2254 struct http_stat hstat; /* HTTP status */
2257 /* Assert that no value for *LOCAL_FILE was passed. */
2258 assert (local_file == NULL || *local_file == NULL);
2260 /* Set LOCAL_FILE parameter. */
2261 if (local_file && opt.output_document)
2262 *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2264 /* Reset NEWLOC parameter. */
2267 /* This used to be done in main(), but it's a better idea to do it
2268 here so that we don't go through the hoops if we're just using
2273 /* Warn on (likely bogus) wildcard usage in HTTP. */
2274 if (opt.ftp_glob && has_wildcards_p (u->path))
2275 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2277 /* Setup hstat struct. */
2279 hstat.referer = referer;
2281 if (opt.output_document)
2283 hstat.local_file = xstrdup (opt.output_document);
2287 /* Reset the counter. */
2290 /* Reset the document type. */
2296 /* Increment the pass counter. */
2298 sleep_between_retrievals (count);
2300 /* Get the current time string. */
2301 tms = time_str (time (NULL));
2303 if (opt.spider && !got_head)
2304 logprintf (LOG_VERBOSE, _("\
2305 Spider mode enabled. Check if remote file exists.\n"));
2307 /* Print fetch message, if opt.verbose. */
2310 char *hurl = url_string (u, true);
2315 sprintf (tmp, _("(try:%2d)"), count);
2316 logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
2321 logprintf (LOG_NOTQUIET, "--%s-- %s\n",
2326 ws_changetitle (hurl);
2331 /* Default document type is empty. However, if spider mode is
2332 on or time-stamping is employed, HEAD_ONLY commands is
2333 encoded within *dt. */
2334 if (((opt.spider || opt.timestamping) && !got_head)
2335 || (opt.always_rest && !got_name))
2340 /* Decide whether or not to restart. */
2343 && stat (hstat.local_file, &st) == 0
2344 && S_ISREG (st.st_mode))
2345 /* When -c is used, continue from on-disk size. (Can't use
2346 hstat.len even if count>1 because we don't want a failed
2347 first attempt to clobber existing data.) */
2348 hstat.restval = st.st_size;
2350 /* otherwise, continue where the previous try left off */
2351 hstat.restval = hstat.len;
2355 /* Decide whether to send the no-cache directive. We send it in
2357 a) we're using a proxy, and we're past our first retrieval.
2358 Some proxies are notorious for caching incomplete data, so
2359 we require a fresh get.
2360 b) caching is explicitly inhibited. */
2361 if ((proxy && count > 1) /* a */
2362 || !opt.allow_cache) /* b */
2363 *dt |= SEND_NOCACHE;
2365 *dt &= ~SEND_NOCACHE;
2367 /* Try fetching the document, or at least its head. */
2368 err = gethttp (u, &hstat, dt, proxy);
2371 tms = time_str (time (NULL));
2373 /* Get the new location (with or without the redirection). */
2375 *newloc = xstrdup (hstat.newloc);
2379 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2380 case CONERROR: case READERR: case WRITEFAILED:
2381 case RANGEERR: case FOPEN_EXCL_ERR:
2382 /* Non-fatal errors continue executing the loop, which will
2383 bring them to "while" statement at the end, to judge
2384 whether the number of tries was exceeded. */
2385 printwhat (count, opt.ntry);
2387 case FWRITEERR: case FOPENERR:
2388 /* Another fatal error. */
2389 logputs (LOG_VERBOSE, "\n");
2390 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2391 hstat.local_file, strerror (errno));
2392 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2393 case SSLINITFAILED: case CONTNOTSUPPORTED:
2394 /* Fatal errors just return from the function. */
2398 /* Another fatal error. */
2399 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2403 /* Return the new location to the caller. */
2406 logprintf (LOG_NOTQUIET,
2407 _("ERROR: Redirection (%d) without location.\n"),
2417 /* The file was already fully retrieved. */
2421 /* Deal with you later. */
2424 /* All possibilities should have been exhausted. */
2428 if (!(*dt & RETROKF))
2433 /* #### Ugly ugly ugly! */
2434 hurl = url_string (u, true);
2435 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2437 /* Maybe we should always keep track of broken links, not just in
2441 /* #### Again: ugly ugly ugly! */
2443 hurl = url_string (u, true);
2444 nonexisting_url (hurl);
2445 logprintf (LOG_NOTQUIET, _("\
2446 Remote file does not exist -- broken link!!!\n"));
2450 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2451 tms, hstat.statcode, escnonprint (hstat.error));
2453 logputs (LOG_VERBOSE, "\n");
2459 /* Did we get the time-stamp? */
2462 bool restart_loop = false;
2464 if (opt.timestamping && !hstat.remote_time)
2466 logputs (LOG_NOTQUIET, _("\
2467 Last-modified header missing -- time-stamps turned off.\n"));
2469 else if (hstat.remote_time)
2471 /* Convert the date-string into struct tm. */
2472 tmr = http_atotm (hstat.remote_time);
2473 if (tmr == (time_t) (-1))
2474 logputs (LOG_VERBOSE, _("\
2475 Last-modified header invalid -- time-stamp ignored.\n"));
2478 /* The time-stamping section. */
2479 if (opt.timestamping)
2481 if (hstat.orig_file_name) /* Perform the following checks only
2482 if the file we're supposed to
2483 download already exists. */
2485 if (hstat.remote_time &&
2486 tmr != (time_t) (-1))
2488 /* Now time-stamping can be used validly. Time-stamping
2489 means that if the sizes of the local and remote file
2490 match, and local file is newer than the remote file,
2491 it will not be retrieved. Otherwise, the normal
2492 download procedure is resumed. */
2493 if (hstat.orig_file_tstamp >= tmr)
2495 if (hstat.contlen == -1
2496 || hstat.orig_file_size == hstat.contlen)
2498 logprintf (LOG_VERBOSE, _("\
2499 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2500 hstat.orig_file_name);
2506 logprintf (LOG_VERBOSE, _("\
2507 The sizes do not match (local %s) -- retrieving.\n"),
2508 number_to_static_string (local_size));
2512 logputs (LOG_VERBOSE,
2513 _("Remote file is newer, retrieving.\n"));
2515 logputs (LOG_VERBOSE, "\n");
2519 /* free_hstat (&hstat); */
2520 hstat.timestamp_checked = true;
2521 restart_loop = true;
2524 if (opt.always_rest)
2527 restart_loop = true;
2536 logputs (LOG_VERBOSE, _("\
2537 Remote file exists and could contain links to other resources -- retrieving.\n\n"));
2538 restart_loop = true;
2542 logprintf (LOG_VERBOSE, _("\
2543 Remote file exists but does not contain any link -- not retrieving.\n\n"));
2550 logprintf (LOG_VERBOSE, _("\
2551 Remote file exists but recursion is disabled -- not retrieving.\n\n"));
2557 got_head = true; /* no more time-stamping */
2559 count = 0; /* the retrieve count for HEAD is reset */
2565 if ((tmr != (time_t) (-1))
2566 && ((hstat.len == hstat.contlen) ||
2567 ((hstat.res == 0) && (hstat.contlen == -1))))
2569 /* #### This code repeats in http.c and ftp.c. Move it to a
2571 const char *fl = NULL;
2572 if (opt.output_document)
2574 if (output_stream_regular)
2575 fl = opt.output_document;
2578 fl = hstat.local_file;
2582 /* End of time-stamping section. */
2584 tmrate = retr_rate (hstat.rd_size, hstat.dltime);
2585 total_download_time += hstat.dltime;
2587 if (hstat.len == hstat.contlen)
2591 logprintf (LOG_VERBOSE,
2592 _("%s (%s) - `%s' saved [%s/%s]\n\n"),
2593 tms, tmrate, hstat.local_file,
2594 number_to_static_string (hstat.len),
2595 number_to_static_string (hstat.contlen));
2596 logprintf (LOG_NONVERBOSE,
2597 "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2599 number_to_static_string (hstat.len),
2600 number_to_static_string (hstat.contlen),
2601 hstat.local_file, count);
2604 total_downloaded_bytes += hstat.len;
2606 /* Remember that we downloaded the file for later ".orig" code. */
2607 if (*dt & ADDED_HTML_EXTENSION)
2608 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2610 downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2615 else if (hstat.res == 0) /* No read error */
2617 if (hstat.contlen == -1) /* We don't know how much we were supposed
2618 to get, so assume we succeeded. */
2622 logprintf (LOG_VERBOSE,
2623 _("%s (%s) - `%s' saved [%s]\n\n"),
2624 tms, tmrate, hstat.local_file,
2625 number_to_static_string (hstat.len));
2626 logprintf (LOG_NONVERBOSE,
2627 "%s URL:%s [%s] -> \"%s\" [%d]\n",
2628 tms, u->url, number_to_static_string (hstat.len),
2629 hstat.local_file, count);
2632 total_downloaded_bytes += hstat.len;
2634 /* Remember that we downloaded the file for later ".orig" code. */
2635 if (*dt & ADDED_HTML_EXTENSION)
2636 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2638 downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2643 else if (hstat.len < hstat.contlen) /* meaning we lost the
2644 connection too soon */
2646 logprintf (LOG_VERBOSE,
2647 _("%s (%s) - Connection closed at byte %s. "),
2648 tms, tmrate, number_to_static_string (hstat.len));
2649 printwhat (count, opt.ntry);
2653 /* Getting here would mean reading more data than
2654 requested with content-length, which we never do. */
2657 else /* from now on hstat.res can only be -1 */
2659 if (hstat.contlen == -1)
2661 logprintf (LOG_VERBOSE,
2662 _("%s (%s) - Read error at byte %s (%s)."),
2663 tms, tmrate, number_to_static_string (hstat.len),
2665 printwhat (count, opt.ntry);
2668 else /* hstat.res == -1 and contlen is given */
2670 logprintf (LOG_VERBOSE,
2671 _("%s (%s) - Read error at byte %s/%s (%s). "),
2673 number_to_static_string (hstat.len),
2674 number_to_static_string (hstat.contlen),
2676 printwhat (count, opt.ntry);
2682 while (!opt.ntry || (count < opt.ntry));
2686 *local_file = xstrdup (hstat.local_file);
2687 free_hstat (&hstat);
2692 /* Check whether the result of strptime() indicates success.
2693 strptime() returns the pointer to how far it got to in the string.
2694 The processing has been successful if the string is at `GMT' or
2695 `+X', or at the end of the string.
2697 In extended regexp parlance, the function returns 1 if P matches
2698 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2699 can return) is considered a failure and 0 is returned. */
2701 check_end (const char *p)
2705 while (ISSPACE (*p))
2708 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2709 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2715 /* Convert the textual specification of time in TIME_STRING to the
2716 number of seconds since the Epoch.
2718 TIME_STRING can be in any of the three formats RFC2616 allows the
2719 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2720 as well as the time format used in the Set-Cookie header.
2721 Timezones are ignored, and should be GMT.
2723 Return the computed time_t representation, or -1 if the conversion
2726 This function uses strptime with various string formats for parsing
2727 TIME_STRING. This results in a parser that is not as lenient in
2728 interpreting TIME_STRING as I would like it to be. Being based on
2729 strptime, it always allows shortened months, one-digit days, etc.,
2730 but due to the multitude of formats in which time can be
2731 represented, an ideal HTTP time parser would be even more
2732 forgiving. It should completely ignore things like week days and
2733 concentrate only on the various forms of representing years,
2734 months, days, hours, minutes, and seconds. For example, it would
2735 be nice if it accepted ISO 8601 out of the box.
2737 I've investigated free and PD code for this purpose, but none was
2738 usable. getdate was big and unwieldy, and had potential copyright
2739 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2740 distributed with phttpd, is excellent, but we cannot use it because
2741 it is not assigned to the FSF. So I stuck it with strptime. */
2744 http_atotm (const char *time_string)
2746 /* NOTE: Solaris strptime man page claims that %n and %t match white
2747 space, but that's not universally available. Instead, we simply
2748 use ` ' to mean "skip all WS", which works under all strptime
2749 implementations I've tested. */
2751 static const char *time_formats[] = {
2752 "%a, %d %b %Y %T", /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
2753 "%A, %d-%b-%y %T", /* rfc850: Thursday, 29-Jan-98 22:12:57 */
2754 "%a %b %d %T %Y", /* asctime: Thu Jan 29 22:12:57 1998 */
2755 "%a, %d-%b-%Y %T" /* cookies: Thu, 29-Jan-1998 22:12:57
2756 (used in Set-Cookie, defined in the
2757 Netscape cookie specification.) */
2759 const char *oldlocale;
2761 time_t ret = (time_t) -1;
2763 /* Solaris strptime fails to recognize English month names in
2764 non-English locales, which we work around by temporarily setting
2765 locale to C before invoking strptime. */
2766 oldlocale = setlocale (LC_TIME, NULL);
2767 setlocale (LC_TIME, "C");
2769 for (i = 0; i < countof (time_formats); i++)
2773 /* Some versions of strptime use the existing contents of struct
2774 tm to recalculate the date according to format. Zero it out
2775 to prevent stack garbage from influencing strptime. */
2778 if (check_end (strptime (time_string, time_formats[i], &t)))
2785 /* Restore the previous locale. */
2786 setlocale (LC_TIME, oldlocale);
2791 /* Authorization support: We support three authorization schemes:
2793 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2795 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2796 consisting of answering to the server's challenge with the proper
2799 * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
2800 Stenberg for libcurl. Like digest, NTLM is based on a
2801 challenge-response mechanism, but unlike digest, it is non-standard
2802 (authenticates TCP connections rather than requests), undocumented
2803 and Microsoft-specific. */
2805 /* Create the authentication header contents for the `Basic' scheme.
2806 This is done by encoding the string "USER:PASS" to base64 and
2807 prepending the string "Basic " in front of it. */
2810 basic_authentication_encode (const char *user, const char *passwd)
2813 int len1 = strlen (user) + 1 + strlen (passwd);
2815 t1 = (char *)alloca (len1 + 1);
2816 sprintf (t1, "%s:%s", user, passwd);
2818 t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
2819 base64_encode (t1, len1, t2);
2821 return concat_strings ("Basic ", t2, (char *) 0);
2824 #define SKIP_WS(x) do { \
2825 while (ISSPACE (*(x))) \
2829 #ifdef ENABLE_DIGEST
2830 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2831 an array of 16 bytes containing the hash keys, and BUF should be a
2832 buffer of 33 writable characters (32 for hex digits plus one for
2833 zero termination). */
2835 dump_hash (char *buf, const unsigned char *hash)
2839 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2841 *buf++ = XNUM_TO_digit (*hash >> 4);
2842 *buf++ = XNUM_TO_digit (*hash & 0xf);
2847 /* Take the line apart to find the challenge, and compose a digest
2848 authorization header. See RFC2069 section 2.1.2. */
2850 digest_authentication_encode (const char *au, const char *user,
2851 const char *passwd, const char *method,
2854 static char *realm, *opaque, *nonce;
2859 { "realm", &realm },
2860 { "opaque", &opaque },
2864 param_token name, value;
2866 realm = opaque = nonce = NULL;
2868 au += 6; /* skip over `Digest' */
2869 while (extract_param (&au, &name, &value, ','))
2872 for (i = 0; i < countof (options); i++)
2873 if (name.e - name.b == strlen (options[i].name)
2874 && 0 == strncmp (name.b, options[i].name, name.e - name.b))
2876 *options[i].variable = strdupdelim (value.b, value.e);
2880 if (!realm || !nonce || !user || !passwd || !path || !method)
2883 xfree_null (opaque);
2888 /* Calculate the digest value. */
2890 ALLOCA_MD5_CONTEXT (ctx);
2891 unsigned char hash[MD5_HASHLEN];
2892 char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2893 char response_digest[MD5_HASHLEN * 2 + 1];
2895 /* A1BUF = H(user ":" realm ":" password) */
2897 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2898 gen_md5_update ((unsigned char *)":", 1, ctx);
2899 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2900 gen_md5_update ((unsigned char *)":", 1, ctx);
2901 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2902 gen_md5_finish (ctx, hash);
2903 dump_hash (a1buf, hash);
2905 /* A2BUF = H(method ":" path) */
2907 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2908 gen_md5_update ((unsigned char *)":", 1, ctx);
2909 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2910 gen_md5_finish (ctx, hash);
2911 dump_hash (a2buf, hash);
2913 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2915 gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
2916 gen_md5_update ((unsigned char *)":", 1, ctx);
2917 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2918 gen_md5_update ((unsigned char *)":", 1, ctx);
2919 gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
2920 gen_md5_finish (ctx, hash);
2921 dump_hash (response_digest, hash);
2923 res = xmalloc (strlen (user)
2928 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2929 + (opaque ? strlen (opaque) : 0)
2931 sprintf (res, "Digest \
2932 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2933 user, realm, nonce, path, response_digest);
2936 char *p = res + strlen (res);
2937 strcat (p, ", opaque=\"");
2944 #endif /* ENABLE_DIGEST */
2946 /* Computing the size of a string literal must take into account that
2947 value returned by sizeof includes the terminating \0. */
2948 #define STRSIZE(literal) (sizeof (literal) - 1)
2950 /* Whether chars in [b, e) begin with the literal string provided as
2951 first argument and are followed by whitespace or terminating \0.
2952 The comparison is case-insensitive. */
2953 #define STARTS(literal, b, e) \
2954 ((e) - (b) >= STRSIZE (literal) \
2955 && 0 == strncasecmp (b, literal, STRSIZE (literal)) \
2956 && ((e) - (b) == STRSIZE (literal) \
2957 || ISSPACE (b[STRSIZE (literal)])))
2960 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
2962 return STARTS ("Basic", hdrbeg, hdrend)
2963 #ifdef ENABLE_DIGEST
2964 || STARTS ("Digest", hdrbeg, hdrend)
2967 || STARTS ("NTLM", hdrbeg, hdrend)
2974 /* Create the HTTP authorization request header. When the
2975 `WWW-Authenticate' response header is seen, according to the
2976 authorization scheme specified in that header (`Basic' and `Digest'
2977 are supported by the current implementation), produce an
2978 appropriate HTTP authorization request header. */
2980 create_authorization_line (const char *au, const char *user,
2981 const char *passwd, const char *method,
2982 const char *path, bool *finished)
2984 /* We are called only with known schemes, so we can dispatch on the
2986 switch (TOUPPER (*au))
2988 case 'B': /* Basic */
2990 return basic_authentication_encode (user, passwd);
2991 #ifdef ENABLE_DIGEST
2992 case 'D': /* Digest */
2994 return digest_authentication_encode (au, user, passwd, method, path);
2997 case 'N': /* NTLM */
2998 if (!ntlm_input (&pconn.ntlm, au))
3003 return ntlm_output (&pconn.ntlm, user, passwd, finished);
3006 /* We shouldn't get here -- this function should be only called
3007 with values approved by known_authentication_scheme_p. */
3015 if (!wget_cookie_jar)
3016 wget_cookie_jar = cookie_jar_new ();
3017 if (opt.cookies_input && !cookies_loaded_p)
3019 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
3020 cookies_loaded_p = true;
3027 if (wget_cookie_jar)
3028 cookie_jar_save (wget_cookie_jar, opt.cookies_output);
3034 xfree_null (pconn.host);
3035 if (wget_cookie_jar)
3036 cookie_jar_delete (wget_cookie_jar);
3043 test_parse_content_disposition()
3048 char *opt_dir_prefix;
3052 { "filename=\"file.ext\"", NULL, "file.ext", true },
3053 { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
3054 { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
3055 { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
3056 { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
3057 { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
3058 { "attachment", NULL, NULL, false },
3059 { "attachment", "somedir", NULL, false },
3062 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
3067 opt.dir_prefix = test_array[i].opt_dir_prefix;
3068 res = parse_content_disposition (test_array[i].hdrval, &filename);
3070 mu_assert ("test_parse_content_disposition: wrong result",
3071 res == test_array[i].result
3073 || 0 == strcmp (test_array[i].filename, filename)));
3079 #endif /* TESTING */