2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
34 #include <sys/types.h>
45 #if TIME_WITH_SYS_TIME
46 # include <sys/time.h>
50 # include <sys/time.h>
67 # include "gen_sslfunc.h"
75 extern char *version_string;
76 extern LARGE_INT total_downloaded_bytes;
78 extern FILE *output_stream;
79 extern int output_stream_regular;
82 # define MIN(x, y) ((x) > (y) ? (y) : (x))
86 static int cookies_loaded_p;
87 struct cookie_jar *wget_cookie_jar;
89 #define TEXTHTML_S "text/html"
90 #define TEXTXHTML_S "application/xhtml+xml"
92 /* Some status code validation macros: */
93 #define H_20X(x) (((x) >= 200) && ((x) < 300))
94 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
95 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
96 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
97 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
99 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
100 /* Successful 2xx. */
101 #define HTTP_STATUS_OK 200
102 #define HTTP_STATUS_CREATED 201
103 #define HTTP_STATUS_ACCEPTED 202
104 #define HTTP_STATUS_NO_CONTENT 204
105 #define HTTP_STATUS_PARTIAL_CONTENTS 206
107 /* Redirection 3xx. */
108 #define HTTP_STATUS_MULTIPLE_CHOICES 300
109 #define HTTP_STATUS_MOVED_PERMANENTLY 301
110 #define HTTP_STATUS_MOVED_TEMPORARILY 302
111 #define HTTP_STATUS_NOT_MODIFIED 304
112 #define HTTP_STATUS_TEMPORARY_REDIRECT 307
114 /* Client error 4xx. */
115 #define HTTP_STATUS_BAD_REQUEST 400
116 #define HTTP_STATUS_UNAUTHORIZED 401
117 #define HTTP_STATUS_FORBIDDEN 403
118 #define HTTP_STATUS_NOT_FOUND 404
119 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
121 /* Server errors 5xx. */
122 #define HTTP_STATUS_INTERNAL 500
123 #define HTTP_STATUS_NOT_IMPLEMENTED 501
124 #define HTTP_STATUS_BAD_GATEWAY 502
125 #define HTTP_STATUS_UNAVAILABLE 503
128 rel_none, rel_name, rel_value, rel_both
135 struct request_header {
137 enum rp release_policy;
139 int hcount, hcapacity;
142 /* Create a new, empty request. At least request_set_method must be
143 called before the request can be used. */
145 static struct request *
148 struct request *req = xnew0 (struct request);
150 req->headers = xnew_array (struct request_header, req->hcapacity);
154 /* Set the request's method and its arguments. METH should be a
155 literal string (or it should outlive the request) because it will
156 not be freed. ARG will be freed by request_free. */
159 request_set_method (struct request *req, const char *meth, char *arg)
165 /* Return the method string passed with the last call to
166 request_set_method. */
169 request_method (const struct request *req)
174 /* Free one header according to the release policy specified with
175 request_set_header. */
178 release_header (struct request_header *hdr)
180 switch (hdr->release_policy)
197 /* Set the request named NAME to VALUE. Specifically, this means that
198 a "NAME: VALUE\r\n" header line will be used in the request. If a
199 header with the same name previously existed in the request, its
200 value will be replaced by this one.
202 RELEASE_POLICY determines whether NAME and VALUE should be released
203 (freed) with request_free. Allowed values are:
205 - rel_none - don't free NAME or VALUE
206 - rel_name - free NAME when done
207 - rel_value - free VALUE when done
208 - rel_both - free both NAME and VALUE when done
210 Setting release policy is useful when arguments come from different
211 sources. For example:
213 // Don't free literal strings!
214 request_set_header (req, "Pragma", "no-cache", rel_none);
216 // Don't free a global variable, we'll need it later.
217 request_set_header (req, "Referer", opt.referer, rel_none);
219 // Value freshly allocated, free it when done.
220 request_set_header (req, "Range", aprintf ("bytes=%ld-", hs->restval),
225 request_set_header (struct request *req, char *name, char *value,
226 enum rp release_policy)
228 struct request_header *hdr;
232 for (i = 0; i < req->hcount; i++)
234 hdr = &req->headers[i];
235 if (0 == strcasecmp (name, hdr->name))
237 /* Replace existing header. */
238 release_header (hdr);
241 hdr->release_policy = release_policy;
246 /* Install new header. */
248 if (req->hcount >= req->hcount)
250 req->hcapacity <<= 1;
251 req->headers = xrealloc (req->headers,
252 req->hcapacity * sizeof (struct request_header));
254 hdr = &req->headers[req->hcount++];
257 hdr->release_policy = release_policy;
260 /* Like request_set_header, but sets the whole header line, as
261 provided by the user using the `--header' option. For example,
262 request_set_user_header (req, "Foo: bar") works just like
263 request_set_header (req, "Foo", "bar"). */
266 request_set_user_header (struct request *req, const char *header)
269 const char *p = strchr (header, ':');
272 BOUNDED_TO_ALLOCA (header, p, name);
276 request_set_header (req, xstrdup (name), (char *) p, rel_name);
279 #define APPEND(p, str) do { \
280 int A_len = strlen (str); \
281 memcpy (p, str, A_len); \
285 /* Construct the request and write it to FD using fd_write. */
288 request_send (const struct request *req, int fd)
290 char *request_string, *p;
291 int i, size, write_error;
293 /* Count the request size. */
296 /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
297 size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
299 for (i = 0; i < req->hcount; i++)
301 struct request_header *hdr = &req->headers[i];
302 /* NAME ": " VALUE "\r\n" */
303 size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
309 p = request_string = alloca_array (char, size);
311 /* Generate the request. */
313 APPEND (p, req->method); *p++ = ' ';
314 APPEND (p, req->arg); *p++ = ' ';
315 memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
317 for (i = 0; i < req->hcount; i++)
319 struct request_header *hdr = &req->headers[i];
320 APPEND (p, hdr->name);
321 *p++ = ':', *p++ = ' ';
322 APPEND (p, hdr->value);
323 *p++ = '\r', *p++ = '\n';
326 *p++ = '\r', *p++ = '\n', *p++ = '\0';
327 assert (p - request_string == size);
331 DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
333 /* Send the request to the server. */
335 write_error = fd_write (fd, request_string, size - 1, -1);
337 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
342 /* Release the resources used by REQ. */
345 request_free (struct request *req)
348 xfree_null (req->arg);
349 for (i = 0; i < req->hcount; i++)
350 release_header (&req->headers[i]);
351 xfree_null (req->headers);
355 /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
356 PROMISED_SIZE bytes are sent over the wire -- if the file is
357 longer, read only that much; if the file is shorter, report an error. */
360 post_file (int sock, const char *file_name, long promised_size)
362 static char chunk[8192];
367 DEBUGP (("[writing POST file %s ... ", file_name));
369 fp = fopen (file_name, "rb");
372 while (!feof (fp) && written < promised_size)
375 int length = fread (chunk, 1, sizeof (chunk), fp);
378 towrite = MIN (promised_size - written, length);
379 write_error = fd_write (sock, chunk, towrite, -1);
389 /* If we've written less than was promised, report a (probably
390 nonsensical) error rather than break the promise. */
391 if (written < promised_size)
397 assert (written == promised_size);
398 DEBUGP (("done]\n"));
403 head_terminator (const char *hunk, int oldlen, int peeklen)
405 const char *start, *end;
407 /* If at first peek, verify whether HUNK starts with "HTTP". If
408 not, this is a HTTP/0.9 request and we must bail out without
410 if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
416 start = hunk + oldlen - 4;
417 end = hunk + oldlen + peeklen;
419 for (; start < end - 1; start++)
426 if (start[1] == '\n')
432 /* Read the HTTP request head from FD and return it. The error
433 conditions are the same as with fd_read_hunk.
435 To support HTTP/0.9 responses, this function tries to make sure
436 that the data begins with "HTTP". If this is not the case, no data
437 is read and an empty request is returned, so that the remaining
438 data can be treated as body. */
441 fd_read_http_head (int fd)
443 return fd_read_hunk (fd, head_terminator, 512);
447 /* The response data. */
450 /* The array of pointers that indicate where each header starts.
451 For example, given this HTTP response:
458 The headers are located like this:
460 "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
462 headers[0] headers[1] headers[2] headers[3]
464 I.e. headers[0] points to the beginning of the request,
465 headers[1] points to the end of the first header and the
466 beginning of the second one, etc. */
468 const char **headers;
471 /* Create a new response object from the text of the HTTP response,
472 available in HEAD. That text is automatically split into
473 constituent header lines for fast retrieval using
474 response_header_*. */
476 static struct response *
477 response_new (const char *head)
482 struct response *resp = xnew0 (struct response);
487 /* Empty head means that we're dealing with a headerless
488 (HTTP/0.9) response. In that case, don't set HEADERS at
493 /* Split HEAD into header lines, so that response_header_* functions
494 don't need to do this over and over again. */
500 DO_REALLOC (resp->headers, size, count + 1, const char *);
501 resp->headers[count++] = hdr;
503 /* Break upon encountering an empty line. */
504 if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
507 /* Find the end of HDR, including continuations. */
510 const char *end = strchr (hdr, '\n');
516 while (*hdr == ' ' || *hdr == '\t');
518 DO_REALLOC (resp->headers, size, count + 1, const char *);
519 resp->headers[count++] = NULL;
524 /* Locate the header named NAME in the request data. If found, set
525 *BEGPTR to its starting, and *ENDPTR to its ending position, and
526 return 1. Otherwise return 0.
528 This function is used as a building block for response_header_copy
529 and response_header_strdup. */
532 response_header_bounds (const struct response *resp, const char *name,
533 const char **begptr, const char **endptr)
536 const char **headers = resp->headers;
539 if (!headers || !headers[1])
542 name_len = strlen (name);
544 for (i = 1; headers[i + 1]; i++)
546 const char *b = headers[i];
547 const char *e = headers[i + 1];
549 && b[name_len] == ':'
550 && 0 == strncasecmp (b, name, name_len))
553 while (b < e && ISSPACE (*b))
555 while (b < e && ISSPACE (e[-1]))
565 /* Copy the response header named NAME to buffer BUF, no longer than
566 BUFSIZE (BUFSIZE includes the terminating 0). If the header
567 exists, 1 is returned, otherwise 0. If there should be no limit on
568 the size of the header, use response_header_strdup instead.
570 If BUFSIZE is 0, no data is copied, but the boolean indication of
571 whether the header is present is still returned. */
574 response_header_copy (const struct response *resp, const char *name,
575 char *buf, int bufsize)
578 if (!response_header_bounds (resp, name, &b, &e))
582 int len = MIN (e - b, bufsize);
583 strncpy (buf, b, len);
589 /* Return the value of header named NAME in RESP, allocated with
590 malloc. If such a header does not exist in RESP, return NULL. */
593 response_header_strdup (const struct response *resp, const char *name)
596 if (!response_header_bounds (resp, name, &b, &e))
598 return strdupdelim (b, e);
601 /* Parse the HTTP status line, which is of format:
603 HTTP-Version SP Status-Code SP Reason-Phrase
605 The function returns the status-code, or -1 if the status line
606 appears malformed. The pointer to "reason-phrase" message is
607 returned in *MESSAGE. */
610 response_status (const struct response *resp, char **message)
617 /* For a HTTP/0.9 response, assume status 200. */
619 *message = xstrdup (_("No headers, assuming HTTP/0.9"));
623 p = resp->headers[0];
624 end = resp->headers[1];
630 if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
634 /* Match the HTTP version. This is optional because Gnutella
635 servers have been reported to not specify HTTP version. */
636 if (p < end && *p == '/')
639 while (p < end && ISDIGIT (*p))
641 if (p < end && *p == '.')
643 while (p < end && ISDIGIT (*p))
647 while (p < end && ISSPACE (*p))
649 if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
652 status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
657 while (p < end && ISSPACE (*p))
659 while (p < end && ISSPACE (end[-1]))
661 *message = strdupdelim (p, end);
667 /* Release the resources used by RESP. */
670 response_free (struct response *resp)
672 xfree_null (resp->headers);
676 /* Print [b, e) to the log, omitting the trailing CRLF. */
679 print_server_response_1 (const char *prefix, const char *b, const char *e)
682 if (b < e && e[-1] == '\n')
684 if (b < e && e[-1] == '\r')
686 BOUNDED_TO_ALLOCA (b, e, ln);
687 logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln);
690 /* Print the server response, line by line, omitting the trailing CR
691 characters, prefixed with PREFIX. */
694 print_server_response (const struct response *resp, const char *prefix)
699 for (i = 0; resp->headers[i + 1]; i++)
700 print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]);
703 /* Parse the `Content-Range' header and extract the information it
704 contains. Returns 1 if successful, -1 otherwise. */
706 parse_content_range (const char *hdr, long *first_byte_ptr,
707 long *last_byte_ptr, long *entity_length_ptr)
711 /* Ancient versions of Netscape proxy server, presumably predating
712 rfc2068, sent out `Content-Range' without the "bytes"
714 if (!strncasecmp (hdr, "bytes", 5))
717 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
721 while (ISSPACE (*hdr))
728 for (num = 0; ISDIGIT (*hdr); hdr++)
729 num = 10 * num + (*hdr - '0');
730 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
732 *first_byte_ptr = num;
734 for (num = 0; ISDIGIT (*hdr); hdr++)
735 num = 10 * num + (*hdr - '0');
736 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
738 *last_byte_ptr = num;
740 for (num = 0; ISDIGIT (*hdr); hdr++)
741 num = 10 * num + (*hdr - '0');
742 *entity_length_ptr = num;
746 /* Read the body of the request, but don't store it anywhere and don't
747 display a progress gauge. This is useful for reading the error
748 responses whose bodies don't need to be displayed or logged, but
749 which need to be read anyway. */
752 skip_short_body (int fd, long contlen)
754 /* Skipping the body doesn't make sense if the content length is
755 unknown because, in that case, persistent connections cannot be
756 used. (#### This is not the case with HTTP/1.1 where they can
757 still be used with the magic of the "chunked" transfer!) */
760 DEBUGP (("Skipping %ld bytes of body data... ", contlen));
765 int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1);
770 DEBUGP (("done.\n"));
773 /* Persistent connections. Currently, we cache the most recently used
774 connection as persistent, provided that the HTTP server agrees to
775 make it such. The persistence data is stored in the variables
776 below. Ideally, it should be possible to cache an arbitrary fixed
777 number of these connections. */
779 /* Whether a persistent connection is active. */
780 static int pconn_active;
783 /* The socket of the connection. */
786 /* Host and port of the currently active persistent connection. */
790 /* Whether a ssl handshake has occoured on this connection. */
794 /* Mark the persistent connection as invalid and free the resources it
795 uses. This is used by the CLOSE_* macros after they forcefully
796 close a registered persistent connection. */
799 invalidate_persistent (void)
801 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
803 fd_close (pconn.socket);
808 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
809 persistent. This will enable someone to use the same connection
810 later. In the context of HTTP, this must be called only AFTER the
811 response has been received and the server has promised that the
812 connection will remain alive.
814 If a previous connection was persistent, it is closed. */
817 register_persistent (const char *host, int port, int fd, int ssl)
821 if (pconn.socket == fd)
823 /* The connection FD is already registered. */
828 /* The old persistent connection is still active; close it
829 first. This situation arises whenever a persistent
830 connection exists, but we then connect to a different
831 host, and try to register a persistent connection to that
833 invalidate_persistent ();
839 pconn.host = xstrdup (host);
843 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
846 /* Return non-zero if a persistent connection is available for
847 connecting to HOST:PORT. */
850 persistent_available_p (const char *host, int port, int ssl,
851 int *host_lookup_failed)
853 /* First, check whether a persistent connection is active at all. */
857 /* If we want SSL and the last connection wasn't or vice versa,
858 don't use it. Checking for host and port is not enough because
859 HTTP and HTTPS can apparently coexist on the same port. */
860 if (ssl != pconn.ssl)
863 /* If we're not connecting to the same port, we're not interested. */
864 if (port != pconn.port)
867 /* If the host is the same, we're in business. If not, there is
868 still hope -- read below. */
869 if (0 != strcasecmp (host, pconn.host))
871 /* If pconn.socket is already talking to HOST, we needn't
872 reconnect. This happens often when both sites are virtual
873 hosts distinguished only by name and served by the same
874 network interface, and hence the same web server (possibly
875 set up by the ISP and serving many different web sites).
876 This admittedly non-standard optimization does not contradict
877 HTTP and works well with popular server software. */
881 struct address_list *al;
884 /* Don't try to talk to two different SSL sites over the same
885 secure connection! (Besides, it's not clear if name-based
886 virtual hosting is even possible with SSL.) */
889 /* If pconn.socket's peer is one of the IP addresses HOST
890 resolves to, pconn.socket is for all intents and purposes
891 already talking to HOST. */
893 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
895 /* Can't get the peer's address -- something must be very
896 wrong with the connection. */
897 invalidate_persistent ();
900 al = lookup_host (host, 0);
903 *host_lookup_failed = 1;
907 found = address_list_contains (al, &ip);
908 address_list_release (al);
913 /* The persistent connection's peer address was found among the
914 addresses HOST resolved to; therefore, pconn.sock is in fact
915 already talking to HOST -- no need to reconnect. */
918 /* Finally, check whether the connection is still open. This is
919 important because most server implement a liberal (short) timeout
920 on persistent connections. Wget can of course always reconnect
921 if the connection doesn't work out, but it's nicer to know in
922 advance. This test is a logical followup of the first test, but
923 is "expensive" and therefore placed at the end of the list. */
925 if (!test_socket_open (pconn.socket))
927 /* Oops, the socket is no longer open. Now that we know that,
928 let's invalidate the persistent connection before returning
930 invalidate_persistent ();
937 /* The idea behind these two CLOSE macros is to distinguish between
938 two cases: one when the job we've been doing is finished, and we
939 want to close the connection and leave, and two when something is
940 seriously wrong and we're closing the connection as part of
943 In case of keep_alive, CLOSE_FINISH should leave the connection
944 open, while CLOSE_INVALIDATE should still close it.
946 Note that the semantics of the flag `keep_alive' is "this
947 connection *will* be reused (the server has promised not to close
948 the connection once we're done)", while the semantics of
949 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
950 active, registered connection". */
952 #define CLOSE_FINISH(fd) do { \
955 if (pconn_active && (fd) == pconn.socket) \
956 invalidate_persistent (); \
965 #define CLOSE_INVALIDATE(fd) do { \
966 if (pconn_active && (fd) == pconn.socket) \
967 invalidate_persistent (); \
975 long len; /* received length */
976 long contlen; /* expected length */
977 long restval; /* the restart value */
978 int res; /* the result of last read */
979 char *newloc; /* new location (redirection) */
980 char *remote_time; /* remote time-stamp string */
981 char *error; /* textual HTTP error */
982 int statcode; /* status code */
983 double dltime; /* time of the download in msecs */
984 const char *referer; /* value of the referer header. */
985 char **local_file; /* local file. */
989 free_hstat (struct http_stat *hs)
991 xfree_null (hs->newloc);
992 xfree_null (hs->remote_time);
993 xfree_null (hs->error);
995 /* Guard against being called twice. */
997 hs->remote_time = NULL;
1001 static char *create_authorization_line PARAMS ((const char *, const char *,
1002 const char *, const char *,
1004 static char *basic_authentication_encode PARAMS ((const char *, const char *));
1005 static int known_authentication_scheme_p PARAMS ((const char *));
1007 time_t http_atotm PARAMS ((const char *));
1009 #define BEGINS_WITH(line, string_constant) \
1010 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1011 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1012 || !line[sizeof (string_constant) - 1]))
1014 /* Retrieve a document through HTTP protocol. It recognizes status
1015 code, and correctly handles redirections. It closes the network
1016 socket. If it receives an error from the functions below it, it
1017 will print it if there is enough information to do so (almost
1018 always), returning the error to the caller (i.e. http_loop).
1020 Various HTTP parameters are stored to hs.
1022 If PROXY is non-NULL, the connection will be made to the proxy
1023 server, and u->url will be requested. */
1025 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1027 struct request *req;
1030 char *user, *passwd;
1034 long contlen, contrange;
1041 /* Whether authorization has been already tried. */
1042 int auth_tried_already = 0;
1044 /* Whether our connection to the remote host is through SSL. */
1048 struct response *resp;
1052 /* Whether this connection will be kept alive after the HTTP request
1056 /* Whether keep-alive should be inhibited. */
1057 int inhibit_keep_alive = !opt.http_keep_alive;
1059 /* Headers sent when using POST. */
1060 long post_data_size = 0;
1062 int host_lookup_failed = 0;
1065 if (u->scheme == SCHEME_HTTPS)
1067 /* Initialize the SSL context. After this has once been done,
1068 it becomes a no-op. */
1069 switch (ssl_init ())
1071 case SSLERRCTXCREATE:
1073 logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
1074 return SSLERRCTXCREATE;
1075 case SSLERRCERTFILE:
1076 /* try without certfile */
1077 logprintf (LOG_NOTQUIET,
1078 _("Failed to load certificates from %s\n"),
1080 logprintf (LOG_NOTQUIET,
1081 _("Trying without the specified certificate\n"));
1084 logprintf (LOG_NOTQUIET,
1085 _("Failed to get certificate key from %s\n"),
1087 logprintf (LOG_NOTQUIET,
1088 _("Trying without the specified certificate\n"));
1094 #endif /* HAVE_SSL */
1096 if (!(*dt & HEAD_ONLY))
1097 /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1098 know the local filename so we can save to it. */
1099 assert (*hs->local_file != NULL);
1101 auth_tried_already = 0;
1103 /* Initialize certain elements of struct http_stat. */
1108 hs->remote_time = NULL;
1116 char *proxy_user, *proxy_passwd;
1117 /* For normal username and password, URL components override
1118 command-line/wgetrc parameters. With proxy
1119 authentication, it's the reverse, because proxy URLs are
1120 normally the "permanent" ones, so command-line args
1121 should take precedence. */
1122 if (opt.proxy_user && opt.proxy_passwd)
1124 proxy_user = opt.proxy_user;
1125 proxy_passwd = opt.proxy_passwd;
1129 proxy_user = proxy->user;
1130 proxy_passwd = proxy->passwd;
1132 /* #### This does not appear right. Can't the proxy request,
1133 say, `Digest' authentication? */
1134 if (proxy_user && proxy_passwd)
1135 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1137 /* If we're using a proxy, we will be connecting to the proxy
1142 /* Prepare the request to send. */
1144 req = request_new ();
1146 const char *meth = "GET";
1147 if (*dt & HEAD_ONLY)
1149 else if (opt.post_file_name || opt.post_data)
1151 /* Use the full path, i.e. one that includes the leading slash and
1152 the query string. E.g. if u->path is "foo/bar" and u->query is
1153 "param=value", full_path will be "/foo/bar?param=value". */
1154 request_set_method (req, meth,
1155 proxy ? xstrdup (u->url) : url_full_path (u));
1158 request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1159 if (*dt & SEND_NOCACHE)
1160 request_set_header (req, "Pragma", "no-cache", rel_none);
1162 request_set_header (req, "Range",
1163 aprintf ("bytes=%ld-", hs->restval), rel_value);
1165 request_set_header (req, "User-Agent", opt.useragent, rel_none);
1167 request_set_header (req, "User-Agent",
1168 aprintf ("Wget/%s", version_string), rel_value);
1169 request_set_header (req, "Accept", "*/*", rel_none);
1171 /* Find the username and password for authentication. */
1174 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1175 user = user ? user : opt.http_user;
1176 passwd = passwd ? passwd : opt.http_passwd;
1180 /* We have the username and the password, but haven't tried
1181 any authorization yet. Let's see if the "Basic" method
1182 works. If not, we'll come back here and construct a
1183 proper authorization method with the right challenges.
1185 If we didn't employ this kind of logic, every URL that
1186 requires authorization would have to be processed twice,
1187 which is very suboptimal and generates a bunch of false
1188 "unauthorized" errors in the server log.
1190 #### But this logic also has a serious problem when used
1191 with stronger authentications: we *first* transmit the
1192 username and the password in clear text, and *then* attempt a
1193 stronger authentication scheme. That cannot be right! We
1194 are only fortunate that almost everyone still uses the
1195 `Basic' scheme anyway.
1197 There should be an option to prevent this from happening, for
1198 those who use strong authentication schemes and value their
1200 request_set_header (req, "Authorization",
1201 basic_authentication_encode (user, passwd),
1206 /* Whether we need to print the host header with braces around
1207 host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1208 usual "Host: symbolic-name:1234". */
1209 int squares = strchr (u->host, ':') != NULL;
1210 if (u->port == scheme_default_port (u->scheme))
1211 request_set_header (req, "Host",
1212 aprintf (squares ? "[%s]" : "%s", u->host),
1215 request_set_header (req, "Host",
1216 aprintf (squares ? "[%s]:%d" : "%s:%d",
1221 if (!inhibit_keep_alive)
1222 request_set_header (req, "Connection", "Keep-Alive", rel_none);
1225 request_set_header (req, "Cookie",
1226 cookie_header (wget_cookie_jar,
1227 u->host, u->port, u->path,
1229 u->scheme == SCHEME_HTTPS
1236 if (opt.post_data || opt.post_file_name)
1238 request_set_header (req, "Content-Type",
1239 "application/x-www-form-urlencoded", rel_none);
1241 post_data_size = strlen (opt.post_data);
1244 post_data_size = file_size (opt.post_file_name);
1245 if (post_data_size == -1)
1247 logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
1248 opt.post_file_name);
1252 request_set_header (req, "Content-Length",
1253 aprintf ("Content-Length: %ld", post_data_size),
1257 /* Add the user headers. */
1258 if (opt.user_headers)
1261 for (i = 0; opt.user_headers[i]; i++)
1262 request_set_user_header (req, opt.user_headers[i]);
1266 /* We need to come back here when the initial attempt to retrieve
1267 without authorization header fails. (Expected to happen at least
1268 for the Digest authorization scheme.) */
1272 /* Establish the connection. */
1274 if (!inhibit_keep_alive)
1276 /* Look for a persistent connection to target host, unless a
1277 proxy is used. The exception is when SSL is in use, in which
1278 case the proxy is nothing but a passthrough to the target
1279 host, registered as a connection to the latter. */
1280 struct url *relevant = conn;
1282 if (u->scheme == SCHEME_HTTPS)
1286 if (persistent_available_p (relevant->host, relevant->port,
1288 relevant->scheme == SCHEME_HTTPS,
1292 &host_lookup_failed))
1294 sock = pconn.socket;
1295 using_ssl = pconn.ssl;
1296 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1297 pconn.host, pconn.port);
1298 DEBUGP (("Reusing fd %d.\n", sock));
1304 /* In its current implementation, persistent_available_p will
1305 look up conn->host in some cases. If that lookup failed, we
1306 don't need to bother with connect_to_host. */
1307 if (host_lookup_failed)
1310 sock = connect_to_host (conn->host, conn->port);
1314 return (retryable_socket_connect_error (errno)
1315 ? CONERROR : CONIMPOSSIBLE);
1318 if (proxy && u->scheme == SCHEME_HTTPS)
1320 /* When requesting SSL URLs through proxies, use the
1321 CONNECT method to request passthrough. */
1322 struct request *connreq = request_new ();
1323 request_set_method (connreq, "CONNECT",
1324 aprintf ("%s:%d", u->host, u->port));
1327 request_set_header (connreq, "Proxy-Authorization",
1328 proxyauth, rel_value);
1329 /* Now that PROXYAUTH is part of the CONNECT request,
1330 zero it out so we don't send proxy authorization with
1331 the regular request below. */
1335 write_error = request_send (connreq, sock);
1336 request_free (connreq);
1337 if (write_error < 0)
1339 logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1341 CLOSE_INVALIDATE (sock);
1345 head = fd_read_http_head (sock);
1348 logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1350 CLOSE_INVALIDATE (sock);
1359 DEBUGP (("proxy responded with: [%s]\n", head));
1361 resp = response_new (head);
1362 statcode = response_status (resp, &message);
1363 response_free (resp);
1364 if (statcode != 200)
1367 logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1368 message ? message : "?");
1369 xfree_null (message);
1374 /* SOCK is now *really* connected to u->host, so update CONN
1375 to reflect this. That way register_persistent will
1376 register SOCK as being connected to u->host:u->port. */
1380 if (conn->scheme == SCHEME_HTTPS)
1382 if (!ssl_connect (sock))
1389 #endif /* HAVE_SSL */
1392 /* Send the request to server. */
1393 write_error = request_send (req, sock);
1395 if (write_error >= 0)
1399 DEBUGP (("[POST data: %s]\n", opt.post_data));
1400 write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1402 else if (opt.post_file_name && post_data_size != 0)
1403 write_error = post_file (sock, opt.post_file_name, post_data_size);
1406 if (write_error < 0)
1408 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1410 CLOSE_INVALIDATE (sock);
1414 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1415 proxy ? "Proxy" : "HTTP");
1422 head = fd_read_http_head (sock);
1427 logputs (LOG_NOTQUIET, _("No data received.\n"));
1428 CLOSE_INVALIDATE (sock);
1434 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1436 CLOSE_INVALIDATE (sock);
1441 DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1443 resp = response_new (head);
1445 /* Check for status line. */
1447 statcode = response_status (resp, &message);
1448 if (!opt.server_response)
1449 logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
1452 logprintf (LOG_VERBOSE, "\n");
1453 print_server_response (resp, " ");
1456 if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1457 contlen = strtol (hdrval, NULL, 10);
1459 /* Check for keep-alive related responses. */
1460 if (!inhibit_keep_alive && contlen != -1)
1462 if (response_header_copy (resp, "Keep-Alive", NULL, 0))
1464 else if (response_header_copy (resp, "Connection", hdrval,
1467 if (0 == strcasecmp (hdrval, "Keep-Alive"))
1472 /* The server has promised that it will not close the connection
1473 when we're done. This means that we can register it. */
1474 register_persistent (conn->host, conn->port, sock, using_ssl);
1476 if (statcode == HTTP_STATUS_UNAUTHORIZED)
1478 /* Authorization is required. */
1479 skip_short_body (sock, contlen);
1480 CLOSE_FINISH (sock);
1481 if (auth_tried_already || !(user && passwd))
1483 /* If we have tried it already, then there is not point
1485 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1489 char *www_authenticate = response_header_strdup (resp,
1490 "WWW-Authenticate");
1491 /* If the authentication scheme is unknown or if it's the
1492 "Basic" authentication (which we try by default), there's
1493 no sense in retrying. */
1494 if (!www_authenticate
1495 || !known_authentication_scheme_p (www_authenticate)
1496 || BEGINS_WITH (www_authenticate, "Basic"))
1498 xfree_null (www_authenticate);
1499 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1504 auth_tried_already = 1;
1505 pth = url_full_path (u);
1506 request_set_header (req, "Authorization",
1507 create_authorization_line (www_authenticate,
1509 request_method (req),
1513 xfree (www_authenticate);
1514 goto retry_with_auth;
1522 hs->statcode = statcode;
1524 hs->error = xstrdup (_("Malformed status line"));
1526 hs->error = xstrdup (_("(no description)"));
1528 hs->error = xstrdup (message);
1530 type = response_header_strdup (resp, "Content-Type");
1533 char *tmp = strchr (type, ';');
1536 while (tmp > type && ISSPACE (tmp[-1]))
1541 hs->newloc = response_header_strdup (resp, "Location");
1542 hs->remote_time = response_header_strdup (resp, "Last-Modified");
1544 char *set_cookie = response_header_strdup (resp, "Set-Cookie");
1547 /* The jar should have been created by now. */
1548 assert (wget_cookie_jar != NULL);
1549 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
1554 if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1556 long first_byte_pos, last_byte_pos, entity_length;
1557 if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1559 contrange = first_byte_pos;
1561 response_free (resp);
1563 /* 20x responses are counted among successful by default. */
1564 if (H_20X (statcode))
1567 /* Return if redirected. */
1568 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1570 /* RFC2068 says that in case of the 300 (multiple choices)
1571 response, the server can output a preferred URL through
1572 `Location' header; otherwise, the request should be treated
1573 like GET. So, if the location is set, it will be a
1574 redirection; otherwise, just proceed normally. */
1575 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1579 logprintf (LOG_VERBOSE,
1580 _("Location: %s%s\n"),
1581 hs->newloc ? hs->newloc : _("unspecified"),
1582 hs->newloc ? _(" [following]") : "");
1584 skip_short_body (sock, contlen);
1585 CLOSE_FINISH (sock);
1591 /* If content-type is not given, assume text/html. This is because
1592 of the multitude of broken CGI's that "forget" to generate the
1595 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1596 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1601 if (opt.html_extension && (*dt & TEXTHTML))
1602 /* -E / --html-extension / html_extension = on was specified, and this is a
1603 text/html file. If some case-insensitive variation on ".htm[l]" isn't
1604 already the file's suffix, tack on ".html". */
1606 char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
1608 if (last_period_in_local_filename == NULL
1609 || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1610 || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1612 size_t local_filename_len = strlen(*hs->local_file);
1614 *hs->local_file = xrealloc(*hs->local_file,
1615 local_filename_len + sizeof(".html"));
1616 strcpy(*hs->local_file + local_filename_len, ".html");
1618 *dt |= ADDED_HTML_EXTENSION;
1622 if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
1624 /* If `-c' is in use and the file has been fully downloaded (or
1625 the remote file has shrunk), Wget effectively requests bytes
1626 after the end of file and the server response with 416. */
1627 logputs (LOG_VERBOSE, _("\
1628 \n The file is already fully retrieved; nothing to do.\n\n"));
1629 /* In case the caller inspects. */
1632 /* Mark as successfully retrieved. */
1635 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1636 might be more bytes in the body. */
1637 return RETRUNNEEDED;
1639 if ((contrange != 0 && contrange != hs->restval)
1640 || (H_PARTIAL (statcode) && !contrange))
1642 /* The Range request was somehow misunderstood by the server.
1645 CLOSE_INVALIDATE (sock);
1648 hs->contlen = contlen + contrange;
1654 /* No need to print this output if the body won't be
1655 downloaded at all, or if the original server response is
1657 logputs (LOG_VERBOSE, _("Length: "));
1660 logputs (LOG_VERBOSE, legible (contlen + contrange));
1662 logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen));
1665 logputs (LOG_VERBOSE,
1666 opt.ignore_length ? _("ignored") : _("unspecified"));
1668 logprintf (LOG_VERBOSE, " [%s]\n", type);
1670 logputs (LOG_VERBOSE, "\n");
1674 type = NULL; /* We don't need it any more. */
1676 /* Return if we have no intention of further downloading. */
1677 if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1679 /* In case the caller cares to look... */
1683 /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
1684 servers not to send body in response to a HEAD request. If
1685 you encounter such a server (more likely a broken CGI), use
1686 `--no-http-keep-alive'. */
1687 CLOSE_FINISH (sock);
1688 return RETRFINISHED;
1691 /* Open the local file. */
1694 mkalldirs (*hs->local_file);
1696 rotate_backups (*hs->local_file);
1697 fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
1700 logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1701 CLOSE_INVALIDATE (sock);
1708 /* #### This confuses the timestamping code that checks for file
1709 size. Maybe we should save some additional information? */
1710 if (opt.save_headers)
1711 fwrite (head, 1, strlen (head), fp);
1713 /* Download the request body. */
1716 flags |= rb_read_exactly;
1717 if (hs->restval > 0 && contrange == 0)
1718 flags |= rb_skip_startpos;
1719 hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
1720 hs->restval, &hs->len, &hs->dltime, flags);
1721 hs->len += hs->restval;
1724 CLOSE_FINISH (sock);
1726 CLOSE_INVALIDATE (sock);
1729 /* Close or flush the file. We have to be careful to check for
1730 error here. Checking the result of fwrite() is not enough --
1731 errors could go unnoticed! */
1734 flush_res = fclose (fp);
1736 flush_res = fflush (fp);
1737 if (flush_res == EOF)
1742 return RETRFINISHED;
1745 /* The genuine HTTP loop! This is the part where the retrieval is
1746 retried, and retried, and retried, and... */
1748 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1749 int *dt, struct url *proxy)
1752 int use_ts, got_head = 0; /* time-stamping info */
1753 char *filename_plus_orig_suffix;
1754 char *local_filename = NULL;
1755 char *tms, *locf, *tmrate;
1757 time_t tml = -1, tmr = -1; /* local and remote time-stamps */
1758 long local_size = 0; /* the size of the local file */
1759 size_t filename_len;
1760 struct http_stat hstat; /* HTTP status */
1764 /* This used to be done in main(), but it's a better idea to do it
1765 here so that we don't go through the hoops if we're just using
1769 if (!wget_cookie_jar)
1770 wget_cookie_jar = cookie_jar_new ();
1771 if (opt.cookies_input && !cookies_loaded_p)
1773 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1774 cookies_loaded_p = 1;
1780 /* Warn on (likely bogus) wildcard usage in HTTP. Don't use
1781 has_wildcards_p because it would also warn on `?', and we know that
1782 shows up in CGI paths a *lot*. */
1783 if (strchr (u->url, '*'))
1784 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1788 /* Determine the local filename. */
1789 if (local_file && *local_file)
1790 hstat.local_file = local_file;
1791 else if (local_file)
1793 *local_file = url_file_name (u);
1794 hstat.local_file = local_file;
1798 dummy = url_file_name (u);
1799 hstat.local_file = &dummy;
1802 if (!opt.output_document)
1803 locf = *hstat.local_file;
1805 locf = opt.output_document;
1807 hstat.referer = referer;
1809 filename_len = strlen (*hstat.local_file);
1810 filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1812 if (opt.noclobber && file_exists_p (*hstat.local_file))
1814 /* If opt.noclobber is turned on and file already exists, do not
1815 retrieve the file */
1816 logprintf (LOG_VERBOSE, _("\
1817 File `%s' already there, will not retrieve.\n"), *hstat.local_file);
1818 /* If the file is there, we suppose it's retrieved OK. */
1821 /* #### Bogusness alert. */
1822 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1823 if (has_html_suffix_p (*hstat.local_file))
1831 if (opt.timestamping)
1833 int local_dot_orig_file_exists = 0;
1835 if (opt.backup_converted)
1836 /* If -K is specified, we'll act on the assumption that it was specified
1837 last time these files were downloaded as well, and instead of just
1838 comparing local file X against server file X, we'll compare local
1839 file X.orig (if extant, else X) against server file X. If -K
1840 _wasn't_ specified last time, or the server contains files called
1841 *.orig, -N will be back to not operating correctly with -k. */
1843 /* Would a single s[n]printf() call be faster? --dan
1845 Definitely not. sprintf() is horribly slow. It's a
1846 different question whether the difference between the two
1847 affects a program. Usually I'd say "no", but at one
1848 point I profiled Wget, and found that a measurable and
1849 non-negligible amount of time was lost calling sprintf()
1850 in url.c. Replacing sprintf with inline calls to
1851 strcpy() and long_to_string() made a difference.
1853 memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
1854 memcpy (filename_plus_orig_suffix + filename_len,
1855 ".orig", sizeof (".orig"));
1857 /* Try to stat() the .orig file. */
1858 if (stat (filename_plus_orig_suffix, &st) == 0)
1860 local_dot_orig_file_exists = 1;
1861 local_filename = filename_plus_orig_suffix;
1865 if (!local_dot_orig_file_exists)
1866 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1867 if (stat (*hstat.local_file, &st) == 0)
1868 local_filename = *hstat.local_file;
1870 if (local_filename != NULL)
1871 /* There was a local file, so we'll check later to see if the version
1872 the server has is the same version we already have, allowing us to
1878 /* Modification time granularity is 2 seconds for Windows, so
1879 increase local time by 1 second for later comparison. */
1882 local_size = st.st_size;
1886 /* Reset the counter. */
1892 /* Increment the pass counter. */
1894 sleep_between_retrievals (count);
1895 /* Get the current time string. */
1896 tms = time_str (NULL);
1897 /* Print fetch message, if opt.verbose. */
1900 char *hurl = url_string (u, 1);
1904 sprintf (tmp, _("(try:%2d)"), count);
1905 logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
1906 tms, hurl, tmp, locf);
1908 ws_changetitle (hurl, 1);
1913 /* Default document type is empty. However, if spider mode is
1914 on or time-stamping is employed, HEAD_ONLY commands is
1915 encoded within *dt. */
1916 if (opt.spider || (use_ts && !got_head))
1921 /* Decide whether or not to restart. */
1924 hstat.restval = hstat.len; /* continue where we left off */
1925 else if (opt.always_rest
1926 && stat (locf, &st) == 0
1927 && S_ISREG (st.st_mode))
1928 hstat.restval = st.st_size;
1930 /* Decide whether to send the no-cache directive. We send it in
1932 a) we're using a proxy, and we're past our first retrieval.
1933 Some proxies are notorious for caching incomplete data, so
1934 we require a fresh get.
1935 b) caching is explicitly inhibited. */
1936 if ((proxy && count > 1) /* a */
1937 || !opt.allow_cache /* b */
1939 *dt |= SEND_NOCACHE;
1941 *dt &= ~SEND_NOCACHE;
1943 /* Try fetching the document, or at least its head. */
1944 err = gethttp (u, &hstat, dt, proxy);
1946 /* It's unfortunate that wget determines the local filename before finding
1947 out the Content-Type of the file. Barring a major restructuring of the
1948 code, we need to re-set locf here, since gethttp() may have xrealloc()d
1949 *hstat.local_file to tack on ".html". */
1950 if (!opt.output_document)
1951 locf = *hstat.local_file;
1953 locf = opt.output_document;
1956 tms = time_str (NULL);
1957 /* Get the new location (with or without the redirection). */
1959 *newloc = xstrdup (hstat.newloc);
1962 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
1963 case CONERROR: case READERR: case WRITEFAILED:
1965 /* Non-fatal errors continue executing the loop, which will
1966 bring them to "while" statement at the end, to judge
1967 whether the number of tries was exceeded. */
1968 free_hstat (&hstat);
1969 printwhat (count, opt.ntry);
1972 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
1973 case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
1974 /* Fatal errors just return from the function. */
1975 free_hstat (&hstat);
1979 case FWRITEERR: case FOPENERR:
1980 /* Another fatal error. */
1981 logputs (LOG_VERBOSE, "\n");
1982 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
1983 *hstat.local_file, strerror (errno));
1984 free_hstat (&hstat);
1989 /* Another fatal error. */
1990 logputs (LOG_VERBOSE, "\n");
1991 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
1992 free_hstat (&hstat);
1997 /* Return the new location to the caller. */
2000 logprintf (LOG_NOTQUIET,
2001 _("ERROR: Redirection (%d) without location.\n"),
2003 free_hstat (&hstat);
2007 free_hstat (&hstat);
2012 /* The file was already fully retrieved. */
2013 free_hstat (&hstat);
2018 /* Deal with you later. */
2021 /* All possibilities should have been exhausted. */
2024 if (!(*dt & RETROKF))
2028 /* #### Ugly ugly ugly! */
2029 char *hurl = url_string (u, 1);
2030 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2033 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2034 tms, hstat.statcode, hstat.error);
2035 logputs (LOG_VERBOSE, "\n");
2036 free_hstat (&hstat);
2041 /* Did we get the time-stamp? */
2044 if (opt.timestamping && !hstat.remote_time)
2046 logputs (LOG_NOTQUIET, _("\
2047 Last-modified header missing -- time-stamps turned off.\n"));
2049 else if (hstat.remote_time)
2051 /* Convert the date-string into struct tm. */
2052 tmr = http_atotm (hstat.remote_time);
2053 if (tmr == (time_t) (-1))
2054 logputs (LOG_VERBOSE, _("\
2055 Last-modified header invalid -- time-stamp ignored.\n"));
2059 /* The time-stamping section. */
2064 use_ts = 0; /* no more time-stamping */
2065 count = 0; /* the retrieve count for HEAD is
2067 if (hstat.remote_time && tmr != (time_t) (-1))
2069 /* Now time-stamping can be used validly. Time-stamping
2070 means that if the sizes of the local and remote file
2071 match, and local file is newer than the remote file,
2072 it will not be retrieved. Otherwise, the normal
2073 download procedure is resumed. */
2075 (hstat.contlen == -1 || local_size == hstat.contlen))
2077 logprintf (LOG_VERBOSE, _("\
2078 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2080 free_hstat (&hstat);
2084 else if (tml >= tmr)
2085 logprintf (LOG_VERBOSE, _("\
2086 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
2088 logputs (LOG_VERBOSE,
2089 _("Remote file is newer, retrieving.\n"));
2091 free_hstat (&hstat);
2094 if ((tmr != (time_t) (-1))
2096 && ((hstat.len == hstat.contlen) ||
2097 ((hstat.res == 0) &&
2098 ((hstat.contlen == -1) ||
2099 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2101 /* #### This code repeats in http.c and ftp.c. Move it to a
2103 const char *fl = NULL;
2104 if (opt.output_document)
2106 if (output_stream_regular)
2107 fl = opt.output_document;
2110 fl = *hstat.local_file;
2114 /* End of time-stamping section. */
2118 logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
2123 tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
2125 if (hstat.len == hstat.contlen)
2129 logprintf (LOG_VERBOSE,
2130 _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
2131 tms, tmrate, locf, hstat.len, hstat.contlen);
2132 logprintf (LOG_NONVERBOSE,
2133 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2134 tms, u->url, hstat.len, hstat.contlen, locf, count);
2137 total_downloaded_bytes += hstat.len;
2139 /* Remember that we downloaded the file for later ".orig" code. */
2140 if (*dt & ADDED_HTML_EXTENSION)
2141 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2143 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2145 free_hstat (&hstat);
2149 else if (hstat.res == 0) /* No read error */
2151 if (hstat.contlen == -1) /* We don't know how much we were supposed
2152 to get, so assume we succeeded. */
2156 logprintf (LOG_VERBOSE,
2157 _("%s (%s) - `%s' saved [%ld]\n\n"),
2158 tms, tmrate, locf, hstat.len);
2159 logprintf (LOG_NONVERBOSE,
2160 "%s URL:%s [%ld] -> \"%s\" [%d]\n",
2161 tms, u->url, hstat.len, locf, count);
2164 total_downloaded_bytes += hstat.len;
2166 /* Remember that we downloaded the file for later ".orig" code. */
2167 if (*dt & ADDED_HTML_EXTENSION)
2168 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2170 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2172 free_hstat (&hstat);
2176 else if (hstat.len < hstat.contlen) /* meaning we lost the
2177 connection too soon */
2179 logprintf (LOG_VERBOSE,
2180 _("%s (%s) - Connection closed at byte %ld. "),
2181 tms, tmrate, hstat.len);
2182 printwhat (count, opt.ntry);
2183 free_hstat (&hstat);
2186 else if (!opt.kill_longer) /* meaning we got more than expected */
2188 logprintf (LOG_VERBOSE,
2189 _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
2190 tms, tmrate, locf, hstat.len, hstat.contlen);
2191 logprintf (LOG_NONVERBOSE,
2192 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2193 tms, u->url, hstat.len, hstat.contlen, locf, count);
2195 total_downloaded_bytes += hstat.len;
2197 /* Remember that we downloaded the file for later ".orig" code. */
2198 if (*dt & ADDED_HTML_EXTENSION)
2199 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2201 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2203 free_hstat (&hstat);
2207 else /* the same, but not accepted */
2209 logprintf (LOG_VERBOSE,
2210 _("%s (%s) - Connection closed at byte %ld/%ld. "),
2211 tms, tmrate, hstat.len, hstat.contlen);
2212 printwhat (count, opt.ntry);
2213 free_hstat (&hstat);
2217 else /* now hstat.res can only be -1 */
2219 if (hstat.contlen == -1)
2221 logprintf (LOG_VERBOSE,
2222 _("%s (%s) - Read error at byte %ld (%s)."),
2223 tms, tmrate, hstat.len, strerror (errno));
2224 printwhat (count, opt.ntry);
2225 free_hstat (&hstat);
2228 else /* hstat.res == -1 and contlen is given */
2230 logprintf (LOG_VERBOSE,
2231 _("%s (%s) - Read error at byte %ld/%ld (%s). "),
2232 tms, tmrate, hstat.len, hstat.contlen,
2234 printwhat (count, opt.ntry);
2235 free_hstat (&hstat);
2242 while (!opt.ntry || (count < opt.ntry));
2246 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2247 than local timezone.
2249 mktime is similar but assumes struct tm, also known as the
2250 "broken-down" form of time, is in local time zone. mktime_from_utc
2251 uses mktime to make the conversion understanding that an offset
2252 will be introduced by the local time assumption.
2254 mktime_from_utc then measures the introduced offset by applying
2255 gmtime to the initial result and applying mktime to the resulting
2256 "broken-down" form. The difference between the two mktime results
2257 is the measured offset which is then subtracted from the initial
2258 mktime result to yield a calendar time which is the value returned.
2260 tm_isdst in struct tm is set to 0 to force mktime to introduce a
2261 consistent offset (the non DST offset) since tm and tm+o might be
2262 on opposite sides of a DST change.
2264 Some implementations of mktime return -1 for the nonexistent
2265 localtime hour at the beginning of DST. In this event, use
2266 mktime(tm - 1hr) + 3600.
2270 gmtime(t+o) --> tm+o
2271 mktime(tm+o) --> t+2o
2272 t+o - (t+2o - t+o) = t
2274 Note that glibc contains a function of the same purpose named
2275 `timegm' (reverse of gmtime). But obviously, it is not universally
2276 available, and unfortunately it is not straightforwardly
2277 extractable for use here. Perhaps configure should detect timegm
2278 and use it where available.
2280 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2281 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2282 Further improved by Roger with assistance from Edward J. Sabol
2283 based on input by Jamie Zawinski. */
2286 mktime_from_utc (struct tm *t)
2297 return -1; /* can't deal with output from strptime */
2308 return -1; /* can't deal with output from gmtime */
2311 return (tl - (tb - tl));
2314 /* Check whether the result of strptime() indicates success.
2315 strptime() returns the pointer to how far it got to in the string.
2316 The processing has been successful if the string is at `GMT' or
2317 `+X', or at the end of the string.
2319 In extended regexp parlance, the function returns 1 if P matches
2320 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2321 can return) is considered a failure and 0 is returned. */
2323 check_end (const char *p)
2327 while (ISSPACE (*p))
2330 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2331 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2337 /* Convert the textual specification of time in TIME_STRING to the
2338 number of seconds since the Epoch.
2340 TIME_STRING can be in any of the three formats RFC2068 allows the
2341 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date.
2342 Timezones are ignored, and should be GMT.
2344 Return the computed time_t representation, or -1 if the conversion
2347 This function uses strptime with various string formats for parsing
2348 TIME_STRING. This results in a parser that is not as lenient in
2349 interpreting TIME_STRING as I would like it to be. Being based on
2350 strptime, it always allows shortened months, one-digit days, etc.,
2351 but due to the multitude of formats in which time can be
2352 represented, an ideal HTTP time parser would be even more
2353 forgiving. It should completely ignore things like week days and
2354 concentrate only on the various forms of representing years,
2355 months, days, hours, minutes, and seconds. For example, it would
2356 be nice if it accepted ISO 8601 out of the box.
2358 I've investigated free and PD code for this purpose, but none was
2359 usable. getdate was big and unwieldy, and had potential copyright
2360 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2361 distributed with phttpd, is excellent, but we cannot use it because
2362 it is not assigned to the FSF. So I stuck it with strptime. */
2365 http_atotm (const char *time_string)
2367 /* NOTE: Solaris strptime man page claims that %n and %t match white
2368 space, but that's not universally available. Instead, we simply
2369 use ` ' to mean "skip all WS", which works under all strptime
2370 implementations I've tested. */
2372 static const char *time_formats[] = {
2373 "%a, %d %b %Y %T", /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
2374 "%A, %d-%b-%y %T", /* RFC850: Thursday, 29-Jan-98 22:12:57 */
2375 "%a, %d-%b-%Y %T", /* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
2376 (google.com uses this for their cookies.) */
2377 "%a %b %d %T %Y" /* asctime: Thu Jan 29 22:12:57 1998 */
2383 /* According to Roger Beeman, we need to initialize tm_isdst, since
2384 strptime won't do it. */
2387 /* Note that under foreign locales Solaris strptime() fails to
2388 recognize English dates, which renders this function useless. We
2389 solve this by being careful not to affect LC_TIME when
2390 initializing locale.
2392 Another solution would be to temporarily set locale to C, invoke
2393 strptime(), and restore it back. This is slow and dirty,
2394 however, and locale support other than LC_MESSAGES can mess other
2395 things, so I rather chose to stick with just setting LC_MESSAGES.
2397 GNU strptime does not have this problem because it recognizes
2398 both international and local dates. */
2400 for (i = 0; i < countof (time_formats); i++)
2401 if (check_end (strptime (time_string, time_formats[i], &t)))
2402 return mktime_from_utc (&t);
2404 /* All formats have failed. */
2408 /* Authorization support: We support two authorization schemes:
2410 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2412 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2413 consisting of answering to the server's challenge with the proper
2416 /* How many bytes it will take to store LEN bytes in base64. */
2417 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
2419 /* Encode the string S of length LENGTH to base64 format and place it
2420 to STORE. STORE will be 0-terminated, and must point to a writable
2421 buffer of at least 1+BASE64_LENGTH(length) bytes. */
2423 base64_encode (const char *s, char *store, int length)
2425 /* Conversion table. */
2426 static char tbl[64] = {
2427 'A','B','C','D','E','F','G','H',
2428 'I','J','K','L','M','N','O','P',
2429 'Q','R','S','T','U','V','W','X',
2430 'Y','Z','a','b','c','d','e','f',
2431 'g','h','i','j','k','l','m','n',
2432 'o','p','q','r','s','t','u','v',
2433 'w','x','y','z','0','1','2','3',
2434 '4','5','6','7','8','9','+','/'
2437 unsigned char *p = (unsigned char *)store;
2439 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
2440 for (i = 0; i < length; i += 3)
2442 *p++ = tbl[s[0] >> 2];
2443 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2444 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2445 *p++ = tbl[s[2] & 0x3f];
2448 /* Pad the result if necessary... */
2449 if (i == length + 1)
2451 else if (i == length + 2)
2452 *(p - 1) = *(p - 2) = '=';
2453 /* ...and zero-terminate it. */
2457 /* Create the authentication header contents for the `Basic' scheme.
2458 This is done by encoding the string `USER:PASS' in base64 and
2459 prepending `HEADER: Basic ' to it. */
2461 basic_authentication_encode (const char *user, const char *passwd)
2463 char *t1, *t2, *res;
2464 int len1 = strlen (user) + 1 + strlen (passwd);
2465 int len2 = BASE64_LENGTH (len1);
2467 t1 = (char *)alloca (len1 + 1);
2468 sprintf (t1, "%s:%s", user, passwd);
2470 t2 = (char *)alloca (len2 + 1);
2471 base64_encode (t1, t2, len1);
2473 res = (char *)xmalloc (6 + len2 + 1);
2474 sprintf (res, "Basic %s", t2);
2479 #define SKIP_WS(x) do { \
2480 while (ISSPACE (*(x))) \
2485 /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
2486 of a field in such a header. If the field is the one specified by
2487 ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2488 digest authorization code), extract its value in the (char*)
2489 variable pointed by RET. Returns negative on a malformed header,
2490 or number of bytes that have been parsed by this call. */
2492 extract_header_attr (const char *au, const char *attr_name, char **ret)
2494 const char *cp, *ep;
2498 if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2500 cp += strlen (attr_name);
2513 for (ep = cp; *ep && *ep != '\"'; ep++)
2518 *ret = strdupdelim (cp, ep);
2525 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2526 an array of 16 bytes containing the hash keys, and BUF should be a
2527 buffer of 33 writable characters (32 for hex digits plus one for
2528 zero termination). */
2530 dump_hash (unsigned char *buf, const unsigned char *hash)
2534 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2536 *buf++ = XNUM_TO_digit (*hash >> 4);
2537 *buf++ = XNUM_TO_digit (*hash & 0xf);
2542 /* Take the line apart to find the challenge, and compose a digest
2543 authorization header. See RFC2069 section 2.1.2. */
2545 digest_authentication_encode (const char *au, const char *user,
2546 const char *passwd, const char *method,
2549 static char *realm, *opaque, *nonce;
2554 { "realm", &realm },
2555 { "opaque", &opaque },
2560 realm = opaque = nonce = NULL;
2562 au += 6; /* skip over `Digest' */
2568 for (i = 0; i < countof (options); i++)
2570 int skip = extract_header_attr (au, options[i].name,
2571 options[i].variable);
2575 xfree_null (opaque);
2585 if (i == countof (options))
2587 while (*au && *au != '=')
2595 while (*au && *au != '\"')
2602 while (*au && *au != ',')
2607 if (!realm || !nonce || !user || !passwd || !path || !method)
2610 xfree_null (opaque);
2615 /* Calculate the digest value. */
2617 ALLOCA_MD5_CONTEXT (ctx);
2618 unsigned char hash[MD5_HASHLEN];
2619 unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2620 unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2622 /* A1BUF = H(user ":" realm ":" password) */
2624 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2625 gen_md5_update ((unsigned char *)":", 1, ctx);
2626 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2627 gen_md5_update ((unsigned char *)":", 1, ctx);
2628 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2629 gen_md5_finish (ctx, hash);
2630 dump_hash (a1buf, hash);
2632 /* A2BUF = H(method ":" path) */
2634 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2635 gen_md5_update ((unsigned char *)":", 1, ctx);
2636 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2637 gen_md5_finish (ctx, hash);
2638 dump_hash (a2buf, hash);
2640 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2642 gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2643 gen_md5_update ((unsigned char *)":", 1, ctx);
2644 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2645 gen_md5_update ((unsigned char *)":", 1, ctx);
2646 gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2647 gen_md5_finish (ctx, hash);
2648 dump_hash (response_digest, hash);
2650 res = (char*) xmalloc (strlen (user)
2655 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2656 + (opaque ? strlen (opaque) : 0)
2658 sprintf (res, "Digest \
2659 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2660 user, realm, nonce, path, response_digest);
2663 char *p = res + strlen (res);
2664 strcat (p, ", opaque=\"");
2671 #endif /* USE_DIGEST */
2674 #define BEGINS_WITH(line, string_constant) \
2675 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
2676 && (ISSPACE (line[sizeof (string_constant) - 1]) \
2677 || !line[sizeof (string_constant) - 1]))
2680 known_authentication_scheme_p (const char *au)
2682 return BEGINS_WITH (au, "Basic")
2683 || BEGINS_WITH (au, "Digest")
2684 || BEGINS_WITH (au, "NTLM");
2689 /* Create the HTTP authorization request header. When the
2690 `WWW-Authenticate' response header is seen, according to the
2691 authorization scheme specified in that header (`Basic' and `Digest'
2692 are supported by the current implementation), produce an
2693 appropriate HTTP authorization request header. */
2695 create_authorization_line (const char *au, const char *user,
2696 const char *passwd, const char *method,
2699 if (0 == strncasecmp (au, "Basic", 5))
2700 return basic_authentication_encode (user, passwd);
2702 if (0 == strncasecmp (au, "Digest", 6))
2703 return digest_authentication_encode (au, user, passwd, method, path);
2704 #endif /* USE_DIGEST */