2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
35 #include <sys/types.h>
46 #if TIME_WITH_SYS_TIME
47 # include <sys/time.h>
51 # include <sys/time.h>
68 # include "gen_sslfunc.h"
76 extern char *version_string;
77 extern LARGE_INT total_downloaded_bytes;
80 # define MIN(x, y) ((x) > (y) ? (y) : (x))
84 static int cookies_loaded_p;
85 struct cookie_jar *wget_cookie_jar;
87 #define TEXTHTML_S "text/html"
88 #define TEXTXHTML_S "application/xhtml+xml"
90 /* Some status code validation macros: */
91 #define H_20X(x) (((x) >= 200) && ((x) < 300))
92 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
93 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
94 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
95 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
97 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
99 #define HTTP_STATUS_OK 200
100 #define HTTP_STATUS_CREATED 201
101 #define HTTP_STATUS_ACCEPTED 202
102 #define HTTP_STATUS_NO_CONTENT 204
103 #define HTTP_STATUS_PARTIAL_CONTENTS 206
105 /* Redirection 3xx. */
106 #define HTTP_STATUS_MULTIPLE_CHOICES 300
107 #define HTTP_STATUS_MOVED_PERMANENTLY 301
108 #define HTTP_STATUS_MOVED_TEMPORARILY 302
109 #define HTTP_STATUS_NOT_MODIFIED 304
110 #define HTTP_STATUS_TEMPORARY_REDIRECT 307
112 /* Client error 4xx. */
113 #define HTTP_STATUS_BAD_REQUEST 400
114 #define HTTP_STATUS_UNAUTHORIZED 401
115 #define HTTP_STATUS_FORBIDDEN 403
116 #define HTTP_STATUS_NOT_FOUND 404
118 /* Server errors 5xx. */
119 #define HTTP_STATUS_INTERNAL 500
120 #define HTTP_STATUS_NOT_IMPLEMENTED 501
121 #define HTTP_STATUS_BAD_GATEWAY 502
122 #define HTTP_STATUS_UNAVAILABLE 503
125 rel_none, rel_name, rel_value, rel_both
132 struct request_header {
134 enum rp release_policy;
136 int hcount, hcapacity;
139 /* Create a new, empty request. At least request_set_method must be
140 called before the request can be used. */
142 static struct request *
145 struct request *req = xnew0 (struct request);
147 req->headers = xnew_array (struct request_header, req->hcapacity);
151 /* Set the request's method and its arguments. METH should be a
152 literal string (or it should outlive the request) because it will
153 not be freed. ARG will be freed by request_free. */
156 request_set_method (struct request *req, const char *meth, char *arg)
162 /* Return the method string passed with the last call to
163 request_set_method. */
166 request_method (const struct request *req)
171 /* Free one header according to the release policy specified with
172 request_set_header. */
175 release_header (struct request_header *hdr)
177 switch (hdr->release_policy)
194 /* Set the request named NAME to VALUE. Specifically, this means that
195 a "NAME: VALUE\r\n" header line will be used in the request. If a
196 header with the same name previously existed in the request, its
197 value will be replaced by this one.
199 RELEASE_POLICY determines whether NAME and VALUE should be released
200 (freed) with request_free. Allowed values are:
202 - rel_none - don't free NAME or VALUE
203 - rel_name - free NAME when done
204 - rel_value - free VALUE when done
205 - rel_both - free both NAME and VALUE when done
207 Setting release policy is useful when arguments come from different
208 sources. For example:
210 // Don't free literal strings!
211 request_set_header (req, "Pragma", "no-cache", rel_none);
213 // Don't free a global variable, we'll need it later.
214 request_set_header (req, "Referer", opt.referer, rel_none);
216 // Value freshly allocated, free it when done.
217 request_set_header (req, "Range", aprintf ("bytes=%ld-", hs->restval),
222 request_set_header (struct request *req, char *name, char *value,
223 enum rp release_policy)
225 struct request_header *hdr;
229 for (i = 0; i < req->hcount; i++)
231 hdr = &req->headers[i];
232 if (0 == strcasecmp (name, hdr->name))
234 /* Replace existing header. */
235 release_header (hdr);
238 hdr->release_policy = release_policy;
243 /* Install new header. */
245 if (req->hcount >= req->hcount)
247 req->hcapacity <<= 1;
248 req->headers = xrealloc (req->headers,
249 req->hcapacity * sizeof (struct request_header));
251 hdr = &req->headers[req->hcount++];
254 hdr->release_policy = release_policy;
257 /* Like request_set_header, but sets the whole header line, as
258 provided by the user using the `--header' option. For example,
259 request_set_user_header (req, "Foo: bar") works just like
260 request_set_header (req, "Foo", "bar"). */
263 request_set_user_header (struct request *req, const char *header)
266 const char *p = strchr (header, ':');
269 BOUNDED_TO_ALLOCA (header, p, name);
273 request_set_header (req, xstrdup (name), (char *) p, rel_name);
276 #define APPEND(p, str) do { \
277 int A_len = strlen (str); \
278 memcpy (p, str, A_len); \
282 /* Construct the request and write it to FD using fd_write. */
285 request_send (const struct request *req, int fd)
287 char *request_string, *p;
288 int i, size, write_error;
290 /* Count the request size. */
293 /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
294 size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
296 for (i = 0; i < req->hcount; i++)
298 struct request_header *hdr = &req->headers[i];
299 /* NAME ": " VALUE "\r\n" */
300 size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
306 p = request_string = alloca_array (char, size);
308 /* Generate the request. */
310 APPEND (p, req->method); *p++ = ' ';
311 APPEND (p, req->arg); *p++ = ' ';
312 memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
314 for (i = 0; i < req->hcount; i++)
316 struct request_header *hdr = &req->headers[i];
317 APPEND (p, hdr->name);
318 *p++ = ':', *p++ = ' ';
319 APPEND (p, hdr->value);
320 *p++ = '\r', *p++ = '\n';
323 *p++ = '\r', *p++ = '\n', *p++ = '\0';
324 assert (p - request_string == size);
328 DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
330 /* Send the request to the server. */
332 write_error = fd_write (fd, request_string, size - 1, -1);
334 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
339 /* Release the resources used by REQ. */
342 request_free (struct request *req)
345 xfree_null (req->arg);
346 for (i = 0; i < req->hcount; i++)
347 release_header (&req->headers[i]);
348 xfree_null (req->headers);
352 /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
353 PROMISED_SIZE bytes are sent over the wire -- if the file is
354 longer, read only that much; if the file is shorter, report an error. */
357 post_file (int sock, const char *file_name, long promised_size)
359 static char chunk[8192];
364 DEBUGP (("[writing POST file %s ... ", file_name));
366 fp = fopen (file_name, "rb");
369 while (!feof (fp) && written < promised_size)
372 int length = fread (chunk, 1, sizeof (chunk), fp);
375 towrite = MIN (promised_size - written, length);
376 write_error = fd_write (sock, chunk, towrite, -1);
386 /* If we've written less than was promised, report a (probably
387 nonsensical) error rather than break the promise. */
388 if (written < promised_size)
394 assert (written == promised_size);
395 DEBUGP (("done]\n"));
400 head_terminator (const char *hunk, int oldlen, int peeklen)
402 const char *start, *end;
404 /* If at first peek, verify whether HUNK starts with "HTTP". If
405 not, this is a HTTP/0.9 request and we must bail out without
407 if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
413 start = hunk + oldlen - 4;
414 end = hunk + oldlen + peeklen;
416 for (; start < end - 1; start++)
423 if (start[1] == '\n')
429 /* Read the HTTP request head from FD and return it. The error
430 conditions are the same as with fd_read_hunk.
432 To support HTTP/0.9 responses, this function tries to make sure
433 that the data begins with "HTTP". If this is not the case, no data
434 is read and an empty request is returned, so that the remaining
435 data can be treated as body. */
438 fd_read_http_head (int fd)
440 return fd_read_hunk (fd, head_terminator, 512);
444 /* The response data. */
447 /* The array of pointers that indicate where each header starts.
448 For example, given this HTTP response:
455 The headers are located like this:
457 "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
459 headers[0] headers[1] headers[2] headers[3]
461 I.e. headers[0] points to the beginning of the request,
462 headers[1] points to the end of the first header and the
463 beginning of the second one, etc. */
465 const char **headers;
468 /* Create a new response object from the text of the HTTP response,
469 available in HEAD. That text is automatically split into
470 constituent header lines for fast retrieval using
471 response_header_*. */
473 static struct response *
474 response_new (const char *head)
479 struct response *resp = xnew0 (struct response);
484 /* Empty head means that we're dealing with a headerless
485 (HTTP/0.9) response. In that case, don't set HEADERS at
490 /* Split HEAD into header lines, so that response_header_* functions
491 don't need to do this over and over again. */
497 DO_REALLOC (resp->headers, size, count + 1, const char *);
498 resp->headers[count++] = hdr;
500 /* Break upon encountering an empty line. */
501 if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
504 /* Find the end of HDR, including continuations. */
507 const char *end = strchr (hdr, '\n');
513 while (*hdr == ' ' || *hdr == '\t');
515 DO_REALLOC (resp->headers, size, count + 1, const char *);
516 resp->headers[count++] = NULL;
521 /* Locate the header named NAME in the request data. If found, set
522 *BEGPTR to its starting, and *ENDPTR to its ending position, and
523 return 1. Otherwise return 0.
525 This function is used as a building block for response_header_copy
526 and response_header_strdup. */
529 response_header_bounds (const struct response *resp, const char *name,
530 const char **begptr, const char **endptr)
533 const char **headers = resp->headers;
536 if (!headers || !headers[1])
539 name_len = strlen (name);
541 for (i = 1; headers[i + 1]; i++)
543 const char *b = headers[i];
544 const char *e = headers[i + 1];
546 && b[name_len] == ':'
547 && 0 == strncasecmp (b, name, name_len))
550 while (b < e && ISSPACE (*b))
552 while (b < e && ISSPACE (e[-1]))
562 /* Copy the response header named NAME to buffer BUF, no longer than
563 BUFSIZE (BUFSIZE includes the terminating 0). If the header
564 exists, 1 is returned, otherwise 0. If there should be no limit on
565 the size of the header, use response_header_strdup instead.
567 If BUFSIZE is 0, no data is copied, but the boolean indication of
568 whether the header is present is still returned. */
571 response_header_copy (const struct response *resp, const char *name,
572 char *buf, int bufsize)
575 if (!response_header_bounds (resp, name, &b, &e))
579 int len = MIN (e - b, bufsize);
580 strncpy (buf, b, len);
586 /* Return the value of header named NAME in RESP, allocated with
587 malloc. If such a header does not exist in RESP, return NULL. */
590 response_header_strdup (const struct response *resp, const char *name)
593 if (!response_header_bounds (resp, name, &b, &e))
595 return strdupdelim (b, e);
598 /* Parse the HTTP status line, which is of format:
600 HTTP-Version SP Status-Code SP Reason-Phrase
602 The function returns the status-code, or -1 if the status line
603 appears malformed. The pointer to "reason-phrase" message is
604 returned in *MESSAGE. */
607 response_status (const struct response *resp, char **message)
614 /* For a HTTP/0.9 response, assume status 200. */
616 *message = xstrdup (_("No headers, assuming HTTP/0.9"));
620 p = resp->headers[0];
621 end = resp->headers[1];
627 if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
631 /* Match the HTTP version. This is optional because Gnutella
632 servers have been reported to not specify HTTP version. */
633 if (p < end && *p == '/')
636 while (p < end && ISDIGIT (*p))
638 if (p < end && *p == '.')
640 while (p < end && ISDIGIT (*p))
644 while (p < end && ISSPACE (*p))
646 if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
649 status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
654 while (p < end && ISSPACE (*p))
656 while (p < end && ISSPACE (end[-1]))
658 *message = strdupdelim (p, end);
664 /* Release the resources used by RESP. */
667 response_free (struct response *resp)
669 xfree_null (resp->headers);
673 /* Print [b, e) to the log, omitting the trailing CRLF. */
676 print_server_response_1 (const char *prefix, const char *b, const char *e)
679 if (b < e && e[-1] == '\n')
681 if (b < e && e[-1] == '\r')
683 BOUNDED_TO_ALLOCA (b, e, ln);
684 logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln);
687 /* Print the server response, line by line, omitting the trailing CR
688 characters, prefixed with PREFIX. */
691 print_server_response (const struct response *resp, const char *prefix)
696 for (i = 0; resp->headers[i + 1]; i++)
697 print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]);
700 /* Parse the `Content-Range' header and extract the information it
701 contains. Returns 1 if successful, -1 otherwise. */
703 parse_content_range (const char *hdr, long *first_byte_ptr,
704 long *last_byte_ptr, long *entity_length_ptr)
708 /* Ancient versions of Netscape proxy server, presumably predating
709 rfc2068, sent out `Content-Range' without the "bytes"
711 if (!strncasecmp (hdr, "bytes", 5))
714 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
718 while (ISSPACE (*hdr))
725 for (num = 0; ISDIGIT (*hdr); hdr++)
726 num = 10 * num + (*hdr - '0');
727 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
729 *first_byte_ptr = num;
731 for (num = 0; ISDIGIT (*hdr); hdr++)
732 num = 10 * num + (*hdr - '0');
733 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
735 *last_byte_ptr = num;
737 for (num = 0; ISDIGIT (*hdr); hdr++)
738 num = 10 * num + (*hdr - '0');
739 *entity_length_ptr = num;
743 /* Read the body of the request, but don't store it anywhere. This is
744 useful when reading error responses that are not logged anywhere,
745 but which need to be read so the same connection can be reused. */
748 skip_body (int fd, long contlen)
753 /* Skipping the body doesn't make sense if the content length is
754 unknown because, in that case, persistent connections cannot be
755 used. (#### This is not the case with HTTP/1.1 where they can
756 still be used with the magic of the "chunked" transfer!) */
760 oldverbose = opt.verbose;
762 fd_read_body (fd, NULL, contlen, 1, 0, &dummy, NULL);
763 opt.verbose = oldverbose;
766 /* Persistent connections. Currently, we cache the most recently used
767 connection as persistent, provided that the HTTP server agrees to
768 make it such. The persistence data is stored in the variables
769 below. Ideally, it should be possible to cache an arbitrary fixed
770 number of these connections. */
772 /* Whether a persistent connection is active. */
773 static int pconn_active;
776 /* The socket of the connection. */
779 /* Host and port of the currently active persistent connection. */
783 /* Whether a ssl handshake has occoured on this connection. */
787 /* Mark the persistent connection as invalid and free the resources it
788 uses. This is used by the CLOSE_* macros after they forcefully
789 close a registered persistent connection. */
792 invalidate_persistent (void)
794 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
796 fd_close (pconn.socket);
801 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
802 persistent. This will enable someone to use the same connection
803 later. In the context of HTTP, this must be called only AFTER the
804 response has been received and the server has promised that the
805 connection will remain alive.
807 If a previous connection was persistent, it is closed. */
810 register_persistent (const char *host, int port, int fd, int ssl)
814 if (pconn.socket == fd)
816 /* The connection FD is already registered. */
821 /* The old persistent connection is still active; close it
822 first. This situation arises whenever a persistent
823 connection exists, but we then connect to a different
824 host, and try to register a persistent connection to that
826 invalidate_persistent ();
832 pconn.host = xstrdup (host);
836 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
839 /* Return non-zero if a persistent connection is available for
840 connecting to HOST:PORT. */
843 persistent_available_p (const char *host, int port, int ssl,
844 int *host_lookup_failed)
846 /* First, check whether a persistent connection is active at all. */
850 /* If we want SSL and the last connection wasn't or vice versa,
851 don't use it. Checking for host and port is not enough because
852 HTTP and HTTPS can apparently coexist on the same port. */
853 if (ssl != pconn.ssl)
856 /* If we're not connecting to the same port, we're not interested. */
857 if (port != pconn.port)
860 /* If the host is the same, we're in business. If not, there is
861 still hope -- read below. */
862 if (0 != strcasecmp (host, pconn.host))
864 /* If pconn.socket is already talking to HOST, we needn't
865 reconnect. This happens often when both sites are virtual
866 hosts distinguished only by name and served by the same
867 network interface, and hence the same web server (possibly
868 set up by the ISP and serving many different web sites).
869 This admittedly non-standard optimization does not contradict
870 HTTP and works well with popular server software. */
874 struct address_list *al;
877 /* Don't try to talk to two different SSL sites over the same
878 secure connection! (Besides, it's not clear if name-based
879 virtual hosting is even possible with SSL.) */
882 /* If pconn.socket's peer is one of the IP addresses HOST
883 resolves to, pconn.socket is for all intents and purposes
884 already talking to HOST. */
886 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
888 /* Can't get the peer's address -- something must be very
889 wrong with the connection. */
890 invalidate_persistent ();
893 al = lookup_host (host, 0);
896 *host_lookup_failed = 1;
900 found = address_list_contains (al, &ip);
901 address_list_release (al);
906 /* The persistent connection's peer address was found among the
907 addresses HOST resolved to; therefore, pconn.sock is in fact
908 already talking to HOST -- no need to reconnect. */
911 /* Finally, check whether the connection is still open. This is
912 important because most server implement a liberal (short) timeout
913 on persistent connections. Wget can of course always reconnect
914 if the connection doesn't work out, but it's nicer to know in
915 advance. This test is a logical followup of the first test, but
916 is "expensive" and therefore placed at the end of the list. */
918 if (!test_socket_open (pconn.socket))
920 /* Oops, the socket is no longer open. Now that we know that,
921 let's invalidate the persistent connection before returning
923 invalidate_persistent ();
930 /* The idea behind these two CLOSE macros is to distinguish between
931 two cases: one when the job we've been doing is finished, and we
932 want to close the connection and leave, and two when something is
933 seriously wrong and we're closing the connection as part of
936 In case of keep_alive, CLOSE_FINISH should leave the connection
937 open, while CLOSE_INVALIDATE should still close it.
939 Note that the semantics of the flag `keep_alive' is "this
940 connection *will* be reused (the server has promised not to close
941 the connection once we're done)", while the semantics of
942 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
943 active, registered connection". */
945 #define CLOSE_FINISH(fd) do { \
948 if (pconn_active && (fd) == pconn.socket) \
949 invalidate_persistent (); \
958 #define CLOSE_INVALIDATE(fd) do { \
959 if (pconn_active && (fd) == pconn.socket) \
960 invalidate_persistent (); \
968 long len; /* received length */
969 long contlen; /* expected length */
970 long restval; /* the restart value */
971 int res; /* the result of last read */
972 char *newloc; /* new location (redirection) */
973 char *remote_time; /* remote time-stamp string */
974 char *error; /* textual HTTP error */
975 int statcode; /* status code */
976 double dltime; /* time of the download in msecs */
977 int no_truncate; /* whether truncating the file is
979 const char *referer; /* value of the referer header. */
980 char **local_file; /* local file. */
984 free_hstat (struct http_stat *hs)
986 xfree_null (hs->newloc);
987 xfree_null (hs->remote_time);
988 xfree_null (hs->error);
990 /* Guard against being called twice. */
992 hs->remote_time = NULL;
996 static char *create_authorization_line PARAMS ((const char *, const char *,
997 const char *, const char *,
999 static char *basic_authentication_encode PARAMS ((const char *, const char *));
1000 static int known_authentication_scheme_p PARAMS ((const char *));
1002 time_t http_atotm PARAMS ((const char *));
1004 #define BEGINS_WITH(line, string_constant) \
1005 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1006 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1007 || !line[sizeof (string_constant) - 1]))
1009 /* Retrieve a document through HTTP protocol. It recognizes status
1010 code, and correctly handles redirections. It closes the network
1011 socket. If it receives an error from the functions below it, it
1012 will print it if there is enough information to do so (almost
1013 always), returning the error to the caller (i.e. http_loop).
1015 Various HTTP parameters are stored to hs.
1017 If PROXY is non-NULL, the connection will be made to the proxy
1018 server, and u->url will be requested. */
1020 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1022 struct request *req;
1025 char *user, *passwd;
1029 long contlen, contrange;
1035 /* Whether authorization has been already tried. */
1036 int auth_tried_already = 0;
1038 /* Whether our connection to the remote host is through SSL. */
1042 struct response *resp;
1046 /* Whether this connection will be kept alive after the HTTP request
1050 /* Whether keep-alive should be inhibited. */
1051 int inhibit_keep_alive = !opt.http_keep_alive;
1053 /* Headers sent when using POST. */
1054 long post_data_size = 0;
1056 int host_lookup_failed = 0;
1059 if (u->scheme == SCHEME_HTTPS)
1061 /* Initialize the SSL context. After this has once been done,
1062 it becomes a no-op. */
1063 switch (ssl_init ())
1065 case SSLERRCTXCREATE:
1067 logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
1068 return SSLERRCTXCREATE;
1069 case SSLERRCERTFILE:
1070 /* try without certfile */
1071 logprintf (LOG_NOTQUIET,
1072 _("Failed to load certificates from %s\n"),
1074 logprintf (LOG_NOTQUIET,
1075 _("Trying without the specified certificate\n"));
1078 logprintf (LOG_NOTQUIET,
1079 _("Failed to get certificate key from %s\n"),
1081 logprintf (LOG_NOTQUIET,
1082 _("Trying without the specified certificate\n"));
1088 #endif /* HAVE_SSL */
1090 if (!(*dt & HEAD_ONLY))
1091 /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1092 know the local filename so we can save to it. */
1093 assert (*hs->local_file != NULL);
1095 auth_tried_already = 0;
1097 /* Initialize certain elements of struct http_stat. */
1102 hs->remote_time = NULL;
1110 char *proxy_user, *proxy_passwd;
1111 /* For normal username and password, URL components override
1112 command-line/wgetrc parameters. With proxy
1113 authentication, it's the reverse, because proxy URLs are
1114 normally the "permanent" ones, so command-line args
1115 should take precedence. */
1116 if (opt.proxy_user && opt.proxy_passwd)
1118 proxy_user = opt.proxy_user;
1119 proxy_passwd = opt.proxy_passwd;
1123 proxy_user = proxy->user;
1124 proxy_passwd = proxy->passwd;
1126 /* #### This does not appear right. Can't the proxy request,
1127 say, `Digest' authentication? */
1128 if (proxy_user && proxy_passwd)
1129 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1131 /* If we're using a proxy, we will be connecting to the proxy
1136 /* Prepare the request to send. */
1138 req = request_new ();
1140 const char *meth = "GET";
1141 if (*dt & HEAD_ONLY)
1143 else if (opt.post_file_name || opt.post_data)
1145 /* Use the full path, i.e. one that includes the leading slash and
1146 the query string. E.g. if u->path is "foo/bar" and u->query is
1147 "param=value", full_path will be "/foo/bar?param=value". */
1148 request_set_method (req, meth,
1149 proxy ? xstrdup (u->url) : url_full_path (u));
1152 request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1153 if (*dt & SEND_NOCACHE)
1154 request_set_header (req, "Pragma", "no-cache", rel_none);
1156 request_set_header (req, "Range",
1157 aprintf ("bytes=%ld-", hs->restval), rel_value);
1159 request_set_header (req, "User-Agent", opt.useragent, rel_none);
1161 request_set_header (req, "User-Agent",
1162 aprintf ("Wget/%s", version_string), rel_value);
1163 request_set_header (req, "Accept", "*/*", rel_none);
1165 /* Find the username and password for authentication. */
1168 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1169 user = user ? user : opt.http_user;
1170 passwd = passwd ? passwd : opt.http_passwd;
1174 /* We have the username and the password, but haven't tried
1175 any authorization yet. Let's see if the "Basic" method
1176 works. If not, we'll come back here and construct a
1177 proper authorization method with the right challenges.
1179 If we didn't employ this kind of logic, every URL that
1180 requires authorization would have to be processed twice,
1181 which is very suboptimal and generates a bunch of false
1182 "unauthorized" errors in the server log.
1184 #### But this logic also has a serious problem when used
1185 with stronger authentications: we *first* transmit the
1186 username and the password in clear text, and *then* attempt a
1187 stronger authentication scheme. That cannot be right! We
1188 are only fortunate that almost everyone still uses the
1189 `Basic' scheme anyway.
1191 There should be an option to prevent this from happening, for
1192 those who use strong authentication schemes and value their
1194 request_set_header (req, "Authorization",
1195 basic_authentication_encode (user, passwd),
1200 /* Whether we need to print the host header with braces around
1201 host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1202 usual "Host: symbolic-name:1234". */
1203 int squares = strchr (u->host, ':') != NULL;
1204 if (u->port == scheme_default_port (u->scheme))
1205 request_set_header (req, "Host",
1206 aprintf (squares ? "[%s]" : "%s", u->host),
1209 request_set_header (req, "Host",
1210 aprintf (squares ? "[%s]:%d" : "%s:%d",
1215 if (!inhibit_keep_alive)
1216 request_set_header (req, "Connection", "Keep-Alive", rel_none);
1219 request_set_header (req, "Cookie",
1220 cookie_header (wget_cookie_jar,
1221 u->host, u->port, u->path,
1223 u->scheme == SCHEME_HTTPS
1230 if (opt.post_data || opt.post_file_name)
1232 request_set_header (req, "Content-Type",
1233 "application/x-www-form-urlencoded", rel_none);
1235 post_data_size = strlen (opt.post_data);
1238 post_data_size = file_size (opt.post_file_name);
1239 if (post_data_size == -1)
1241 logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
1242 opt.post_file_name);
1246 request_set_header (req, "Content-Length",
1247 aprintf ("Content-Length: %ld", post_data_size),
1251 /* Add the user headers. */
1252 if (opt.user_headers)
1255 for (i = 0; opt.user_headers[i]; i++)
1256 request_set_user_header (req, opt.user_headers[i]);
1260 /* We need to come back here when the initial attempt to retrieve
1261 without authorization header fails. (Expected to happen at least
1262 for the Digest authorization scheme.) */
1266 /* Establish the connection. */
1268 if (!inhibit_keep_alive)
1270 /* Look for a persistent connection to target host, unless a
1271 proxy is used. The exception is when SSL is in use, in which
1272 case the proxy is nothing but a passthrough to the target
1273 host, registered as a connection to the latter. */
1274 struct url *relevant = conn;
1276 if (u->scheme == SCHEME_HTTPS)
1280 if (persistent_available_p (relevant->host, relevant->port,
1282 relevant->scheme == SCHEME_HTTPS,
1286 &host_lookup_failed))
1288 sock = pconn.socket;
1289 using_ssl = pconn.ssl;
1290 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1291 pconn.host, pconn.port);
1292 DEBUGP (("Reusing fd %d.\n", sock));
1298 /* In its current implementation, persistent_available_p will
1299 look up conn->host in some cases. If that lookup failed, we
1300 don't need to bother with connect_to_host. */
1301 if (host_lookup_failed)
1304 sock = connect_to_host (conn->host, conn->port);
1308 return (retryable_socket_connect_error (errno)
1309 ? CONERROR : CONIMPOSSIBLE);
1312 if (proxy && u->scheme == SCHEME_HTTPS)
1314 /* When requesting SSL URLs through proxies, use the
1315 CONNECT method to request passthrough. */
1316 struct request *connreq = request_new ();
1317 request_set_method (connreq, "CONNECT",
1318 aprintf ("%s:%d", u->host, u->port));
1321 request_set_header (connreq, "Proxy-Authorization",
1322 proxyauth, rel_value);
1323 /* Now that PROXYAUTH is part of the CONNECT request,
1324 zero it out so we don't send proxy authorization with
1325 the regular request below. */
1329 write_error = request_send (connreq, sock);
1330 request_free (connreq);
1331 if (write_error < 0)
1333 logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1335 CLOSE_INVALIDATE (sock);
1339 head = fd_read_http_head (sock);
1342 logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1344 CLOSE_INVALIDATE (sock);
1353 DEBUGP (("proxy responded with: [%s]\n", head));
1355 resp = response_new (head);
1356 statcode = response_status (resp, &message);
1357 response_free (resp);
1358 if (statcode != 200)
1361 logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1362 message ? message : "?");
1363 xfree_null (message);
1368 /* SOCK is now *really* connected to u->host, so update CONN
1369 to reflect this. That way register_persistent will
1370 register SOCK as being connected to u->host:u->port. */
1374 if (conn->scheme == SCHEME_HTTPS)
1376 if (!ssl_connect (sock))
1383 #endif /* HAVE_SSL */
1386 /* Send the request to server. */
1387 write_error = request_send (req, sock);
1389 if (write_error >= 0)
1393 DEBUGP (("[POST data: %s]\n", opt.post_data));
1394 write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1396 else if (opt.post_file_name && post_data_size != 0)
1397 write_error = post_file (sock, opt.post_file_name, post_data_size);
1400 if (write_error < 0)
1402 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1404 CLOSE_INVALIDATE (sock);
1408 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1409 proxy ? "Proxy" : "HTTP");
1416 head = fd_read_http_head (sock);
1421 logputs (LOG_NOTQUIET, _("No data received.\n"));
1422 CLOSE_INVALIDATE (sock);
1428 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1430 CLOSE_INVALIDATE (sock);
1435 DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1437 resp = response_new (head);
1439 /* Check for status line. */
1441 statcode = response_status (resp, &message);
1442 if (!opt.server_response)
1443 logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
1446 logprintf (LOG_VERBOSE, "\n");
1447 print_server_response (resp, " ");
1450 if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1451 contlen = strtol (hdrval, NULL, 10);
1453 /* Check for keep-alive related responses. */
1454 if (!inhibit_keep_alive && contlen != -1)
1456 if (response_header_copy (resp, "Keep-Alive", NULL, 0))
1458 else if (response_header_copy (resp, "Connection", hdrval,
1461 if (0 == strcasecmp (hdrval, "Keep-Alive"))
1466 /* The server has promised that it will not close the connection
1467 when we're done. This means that we can register it. */
1468 register_persistent (conn->host, conn->port, sock, using_ssl);
1470 if (statcode == HTTP_STATUS_UNAUTHORIZED)
1472 /* Authorization is required. */
1473 skip_body (sock, contlen);
1474 CLOSE_FINISH (sock);
1475 if (auth_tried_already || !(user && passwd))
1477 /* If we have tried it already, then there is not point
1479 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1483 char *www_authenticate = response_header_strdup (resp,
1484 "WWW-Authenticate");
1485 /* If the authentication scheme is unknown or if it's the
1486 "Basic" authentication (which we try by default), there's
1487 no sense in retrying. */
1488 if (!www_authenticate
1489 || !known_authentication_scheme_p (www_authenticate)
1490 || BEGINS_WITH (www_authenticate, "Basic"))
1492 xfree_null (www_authenticate);
1493 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1498 auth_tried_already = 1;
1499 pth = url_full_path (u);
1500 request_set_header (req, "Authorization",
1501 create_authorization_line (www_authenticate,
1503 request_method (req),
1507 xfree (www_authenticate);
1508 goto retry_with_auth;
1516 hs->statcode = statcode;
1518 hs->error = xstrdup (_("Malformed status line"));
1520 hs->error = xstrdup (_("(no description)"));
1522 hs->error = xstrdup (message);
1524 type = response_header_strdup (resp, "Content-Type");
1527 char *tmp = strchr (type, ';');
1530 while (tmp > type && ISSPACE (tmp[-1]))
1535 hs->newloc = response_header_strdup (resp, "Location");
1536 hs->remote_time = response_header_strdup (resp, "Last-Modified");
1538 char *set_cookie = response_header_strdup (resp, "Set-Cookie");
1541 /* The jar should have been created by now. */
1542 assert (wget_cookie_jar != NULL);
1543 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
1548 if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1550 long first_byte_pos, last_byte_pos, entity_length;
1551 if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1553 contrange = first_byte_pos;
1555 response_free (resp);
1557 /* 20x responses are counted among successful by default. */
1558 if (H_20X (statcode))
1561 /* Return if redirected. */
1562 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1564 /* RFC2068 says that in case of the 300 (multiple choices)
1565 response, the server can output a preferred URL through
1566 `Location' header; otherwise, the request should be treated
1567 like GET. So, if the location is set, it will be a
1568 redirection; otherwise, just proceed normally. */
1569 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1573 logprintf (LOG_VERBOSE,
1574 _("Location: %s%s\n"),
1575 hs->newloc ? hs->newloc : _("unspecified"),
1576 hs->newloc ? _(" [following]") : "");
1578 skip_body (sock, contlen);
1579 CLOSE_FINISH (sock);
1585 /* If content-type is not given, assume text/html. This is because
1586 of the multitude of broken CGI's that "forget" to generate the
1589 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1590 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1595 if (opt.html_extension && (*dt & TEXTHTML))
1596 /* -E / --html-extension / html_extension = on was specified, and this is a
1597 text/html file. If some case-insensitive variation on ".htm[l]" isn't
1598 already the file's suffix, tack on ".html". */
1600 char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
1602 if (last_period_in_local_filename == NULL
1603 || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1604 || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1606 size_t local_filename_len = strlen(*hs->local_file);
1608 *hs->local_file = xrealloc(*hs->local_file,
1609 local_filename_len + sizeof(".html"));
1610 strcpy(*hs->local_file + local_filename_len, ".html");
1612 *dt |= ADDED_HTML_EXTENSION;
1616 if (contrange == 0 && hs->restval > 0)
1618 /* The download starts from the beginning, presumably because
1619 the server did not honor our `Range' request. Normally we'd
1620 just reset hs->restval and start the download from
1623 /* However, if `-c' is used, we need to be a bit more careful:
1625 1. If `-c' is specified and the file already existed when
1626 Wget was started, it would be a bad idea to start downloading
1627 it from scratch, effectively truncating the file.
1629 2. If `-c' is used on a file that is already fully
1630 downloaded, we're requesting bytes after the end of file,
1631 which can result in the server not honoring `Range'. If this
1632 is the case, `Content-Length' will be equal to the length of
1634 if (opt.always_rest)
1636 /* Check for condition #2. */
1637 if (contlen != -1 /* we got content-length. */
1638 && hs->restval >= contlen /* file fully downloaded
1642 logputs (LOG_VERBOSE, _("\
1643 \n The file is already fully retrieved; nothing to do.\n\n"));
1644 /* In case the caller inspects. */
1647 /* Mark as successfully retrieved. */
1650 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1651 might be more bytes in the body. */
1652 return RETRUNNEEDED;
1655 /* Check for condition #1. */
1656 if (hs->no_truncate)
1658 logprintf (LOG_NOTQUIET,
1661 Continued download failed on this file, which conflicts with `-c'.\n\
1662 Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
1664 CLOSE_INVALIDATE (sock); /* see above */
1665 return CONTNOTSUPPORTED;
1673 else if (contrange != hs->restval ||
1674 (H_PARTIAL (statcode) && contrange == -1))
1676 /* This means the whole request was somehow misunderstood by the
1677 server. Bail out. */
1679 CLOSE_INVALIDATE (sock);
1682 hs->contlen = contlen + contrange;
1688 /* No need to print this output if the body won't be
1689 downloaded at all, or if the original server response is
1691 logputs (LOG_VERBOSE, _("Length: "));
1694 logputs (LOG_VERBOSE, legible (contlen + contrange));
1696 logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen));
1699 logputs (LOG_VERBOSE,
1700 opt.ignore_length ? _("ignored") : _("unspecified"));
1702 logprintf (LOG_VERBOSE, " [%s]\n", type);
1704 logputs (LOG_VERBOSE, "\n");
1708 type = NULL; /* We don't need it any more. */
1710 /* Return if we have no intention of further downloading. */
1711 if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1713 /* In case the caller cares to look... */
1717 /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
1718 servers not to send body in response to a HEAD request. If
1719 you encounter such a server (more likely a broken CGI), use
1720 `--no-http-keep-alive'. */
1721 CLOSE_FINISH (sock);
1722 return RETRFINISHED;
1725 /* Open the local file. */
1728 mkalldirs (*hs->local_file);
1730 rotate_backups (*hs->local_file);
1731 fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
1734 logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1735 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1736 might be more bytes in the body. */
1742 extern int global_download_count;
1744 /* To ensure that repeated "from scratch" downloads work for -O
1745 files, we rewind the file pointer, unless restval is
1746 non-zero. (This works only when -O is used on regular files,
1747 but it's still a valuable feature.)
1749 However, this loses when more than one URL is specified on
1750 the command line the second rewinds eradicates the contents
1751 of the first download. Thus we disable the above trick for
1752 all the downloads except the very first one.
1754 #### A possible solution to this would be to remember the
1755 file position in the output document and to seek to that
1756 position, instead of rewinding.
1758 We don't truncate stdout, since that breaks
1759 "wget -O - [...] >> foo".
1761 if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
1763 /* This will silently fail for streams that don't correspond
1764 to regular files, but that's OK. */
1766 /* ftruncate is needed because opt.dfp is opened in append
1767 mode if opt.always_rest is set. */
1768 ftruncate (fileno (fp), 0);
1773 /* #### This confuses the code that checks for file size. There
1774 should be some overhead information. */
1775 if (opt.save_headers)
1776 fwrite (head, 1, strlen (head), fp);
1778 /* Download the request body. */
1779 hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, keep_alive,
1780 hs->restval, &hs->len, &hs->dltime);
1781 hs->len += contrange;
1784 CLOSE_FINISH (sock);
1786 CLOSE_INVALIDATE (sock);
1789 /* Close or flush the file. We have to be careful to check for
1790 error here. Checking the result of fwrite() is not enough --
1791 errors could go unnoticed! */
1794 flush_res = fclose (fp);
1796 flush_res = fflush (fp);
1797 if (flush_res == EOF)
1802 return RETRFINISHED;
1805 /* The genuine HTTP loop! This is the part where the retrieval is
1806 retried, and retried, and retried, and... */
1808 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1809 int *dt, struct url *proxy)
1812 int use_ts, got_head = 0; /* time-stamping info */
1813 char *filename_plus_orig_suffix;
1814 char *local_filename = NULL;
1815 char *tms, *locf, *tmrate;
1817 time_t tml = -1, tmr = -1; /* local and remote time-stamps */
1818 long local_size = 0; /* the size of the local file */
1819 size_t filename_len;
1820 struct http_stat hstat; /* HTTP status */
1824 /* This used to be done in main(), but it's a better idea to do it
1825 here so that we don't go through the hoops if we're just using
1829 if (!wget_cookie_jar)
1830 wget_cookie_jar = cookie_jar_new ();
1831 if (opt.cookies_input && !cookies_loaded_p)
1833 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1834 cookies_loaded_p = 1;
1840 /* Warn on (likely bogus) wildcard usage in HTTP. Don't use
1841 has_wildcards_p because it would also warn on `?', and we know that
1842 shows up in CGI paths a *lot*. */
1843 if (strchr (u->url, '*'))
1844 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1846 /* Determine the local filename. */
1847 if (local_file && *local_file)
1848 hstat.local_file = local_file;
1849 else if (local_file)
1851 *local_file = url_file_name (u);
1852 hstat.local_file = local_file;
1856 dummy = url_file_name (u);
1857 hstat.local_file = &dummy;
1860 if (!opt.output_document)
1861 locf = *hstat.local_file;
1863 locf = opt.output_document;
1865 hstat.referer = referer;
1867 filename_len = strlen (*hstat.local_file);
1868 filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1870 if (opt.noclobber && file_exists_p (*hstat.local_file))
1872 /* If opt.noclobber is turned on and file already exists, do not
1873 retrieve the file */
1874 logprintf (LOG_VERBOSE, _("\
1875 File `%s' already there, will not retrieve.\n"), *hstat.local_file);
1876 /* If the file is there, we suppose it's retrieved OK. */
1879 /* #### Bogusness alert. */
1880 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1881 if (has_html_suffix_p (*hstat.local_file))
1889 if (opt.timestamping)
1891 int local_dot_orig_file_exists = 0;
1893 if (opt.backup_converted)
1894 /* If -K is specified, we'll act on the assumption that it was specified
1895 last time these files were downloaded as well, and instead of just
1896 comparing local file X against server file X, we'll compare local
1897 file X.orig (if extant, else X) against server file X. If -K
1898 _wasn't_ specified last time, or the server contains files called
1899 *.orig, -N will be back to not operating correctly with -k. */
1901 /* Would a single s[n]printf() call be faster? --dan
1903 Definitely not. sprintf() is horribly slow. It's a
1904 different question whether the difference between the two
1905 affects a program. Usually I'd say "no", but at one
1906 point I profiled Wget, and found that a measurable and
1907 non-negligible amount of time was lost calling sprintf()
1908 in url.c. Replacing sprintf with inline calls to
1909 strcpy() and long_to_string() made a difference.
1911 memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
1912 memcpy (filename_plus_orig_suffix + filename_len,
1913 ".orig", sizeof (".orig"));
1915 /* Try to stat() the .orig file. */
1916 if (stat (filename_plus_orig_suffix, &st) == 0)
1918 local_dot_orig_file_exists = 1;
1919 local_filename = filename_plus_orig_suffix;
1923 if (!local_dot_orig_file_exists)
1924 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1925 if (stat (*hstat.local_file, &st) == 0)
1926 local_filename = *hstat.local_file;
1928 if (local_filename != NULL)
1929 /* There was a local file, so we'll check later to see if the version
1930 the server has is the same version we already have, allowing us to
1936 /* Modification time granularity is 2 seconds for Windows, so
1937 increase local time by 1 second for later comparison. */
1940 local_size = st.st_size;
1944 /* Reset the counter. */
1946 *dt = 0 | ACCEPTRANGES;
1950 /* Increment the pass counter. */
1952 sleep_between_retrievals (count);
1953 /* Get the current time string. */
1954 tms = time_str (NULL);
1955 /* Print fetch message, if opt.verbose. */
1958 char *hurl = url_string (u, 1);
1962 sprintf (tmp, _("(try:%2d)"), count);
1963 logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
1964 tms, hurl, tmp, locf);
1966 ws_changetitle (hurl, 1);
1971 /* Default document type is empty. However, if spider mode is
1972 on or time-stamping is employed, HEAD_ONLY commands is
1973 encoded within *dt. */
1974 if (opt.spider || (use_ts && !got_head))
1978 /* Assume no restarting. */
1980 /* Decide whether or not to restart. */
1981 if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
1982 /* #### this calls access() and then stat(); could be optimized. */
1983 && file_exists_p (locf))
1984 if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
1985 hstat.restval = st.st_size;
1987 /* In `-c' is used and the file is existing and non-empty,
1988 refuse to truncate it if the server doesn't support continued
1990 hstat.no_truncate = 0;
1991 if (opt.always_rest && hstat.restval)
1992 hstat.no_truncate = 1;
1994 /* Decide whether to send the no-cache directive. We send it in
1996 a) we're using a proxy, and we're past our first retrieval.
1997 Some proxies are notorious for caching incomplete data, so
1998 we require a fresh get.
1999 b) caching is explicitly inhibited. */
2000 if ((proxy && count > 1) /* a */
2001 || !opt.allow_cache /* b */
2003 *dt |= SEND_NOCACHE;
2005 *dt &= ~SEND_NOCACHE;
2007 /* Try fetching the document, or at least its head. */
2008 err = gethttp (u, &hstat, dt, proxy);
2010 /* It's unfortunate that wget determines the local filename before finding
2011 out the Content-Type of the file. Barring a major restructuring of the
2012 code, we need to re-set locf here, since gethttp() may have xrealloc()d
2013 *hstat.local_file to tack on ".html". */
2014 if (!opt.output_document)
2015 locf = *hstat.local_file;
2017 locf = opt.output_document;
2020 tms = time_str (NULL);
2021 /* Get the new location (with or without the redirection). */
2023 *newloc = xstrdup (hstat.newloc);
2026 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2027 case CONERROR: case READERR: case WRITEFAILED:
2029 /* Non-fatal errors continue executing the loop, which will
2030 bring them to "while" statement at the end, to judge
2031 whether the number of tries was exceeded. */
2032 free_hstat (&hstat);
2033 printwhat (count, opt.ntry);
2036 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2037 case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
2038 /* Fatal errors just return from the function. */
2039 free_hstat (&hstat);
2043 case FWRITEERR: case FOPENERR:
2044 /* Another fatal error. */
2045 logputs (LOG_VERBOSE, "\n");
2046 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2047 *hstat.local_file, strerror (errno));
2048 free_hstat (&hstat);
2053 /* Another fatal error. */
2054 logputs (LOG_VERBOSE, "\n");
2055 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2056 free_hstat (&hstat);
2061 /* Return the new location to the caller. */
2064 logprintf (LOG_NOTQUIET,
2065 _("ERROR: Redirection (%d) without location.\n"),
2067 free_hstat (&hstat);
2071 free_hstat (&hstat);
2076 /* The file was already fully retrieved. */
2077 free_hstat (&hstat);
2082 /* Deal with you later. */
2085 /* All possibilities should have been exhausted. */
2088 if (!(*dt & RETROKF))
2092 /* #### Ugly ugly ugly! */
2093 char *hurl = url_string (u, 1);
2094 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2097 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2098 tms, hstat.statcode, hstat.error);
2099 logputs (LOG_VERBOSE, "\n");
2100 free_hstat (&hstat);
2105 /* Did we get the time-stamp? */
2108 if (opt.timestamping && !hstat.remote_time)
2110 logputs (LOG_NOTQUIET, _("\
2111 Last-modified header missing -- time-stamps turned off.\n"));
2113 else if (hstat.remote_time)
2115 /* Convert the date-string into struct tm. */
2116 tmr = http_atotm (hstat.remote_time);
2117 if (tmr == (time_t) (-1))
2118 logputs (LOG_VERBOSE, _("\
2119 Last-modified header invalid -- time-stamp ignored.\n"));
2123 /* The time-stamping section. */
2128 use_ts = 0; /* no more time-stamping */
2129 count = 0; /* the retrieve count for HEAD is
2131 if (hstat.remote_time && tmr != (time_t) (-1))
2133 /* Now time-stamping can be used validly. Time-stamping
2134 means that if the sizes of the local and remote file
2135 match, and local file is newer than the remote file,
2136 it will not be retrieved. Otherwise, the normal
2137 download procedure is resumed. */
2139 (hstat.contlen == -1 || local_size == hstat.contlen))
2141 logprintf (LOG_VERBOSE, _("\
2142 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2144 free_hstat (&hstat);
2148 else if (tml >= tmr)
2149 logprintf (LOG_VERBOSE, _("\
2150 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
2152 logputs (LOG_VERBOSE,
2153 _("Remote file is newer, retrieving.\n"));
2155 free_hstat (&hstat);
2158 if ((tmr != (time_t) (-1))
2160 && ((hstat.len == hstat.contlen) ||
2161 ((hstat.res == 0) &&
2162 ((hstat.contlen == -1) ||
2163 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2165 /* #### This code repeats in http.c and ftp.c. Move it to a
2167 const char *fl = NULL;
2168 if (opt.output_document)
2170 if (opt.od_known_regular)
2171 fl = opt.output_document;
2174 fl = *hstat.local_file;
2178 /* End of time-stamping section. */
2182 logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
2187 tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
2189 if (hstat.len == hstat.contlen)
2193 logprintf (LOG_VERBOSE,
2194 _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
2195 tms, tmrate, locf, hstat.len, hstat.contlen);
2196 logprintf (LOG_NONVERBOSE,
2197 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2198 tms, u->url, hstat.len, hstat.contlen, locf, count);
2201 total_downloaded_bytes += hstat.len;
2203 /* Remember that we downloaded the file for later ".orig" code. */
2204 if (*dt & ADDED_HTML_EXTENSION)
2205 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2207 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2209 free_hstat (&hstat);
2213 else if (hstat.res == 0) /* No read error */
2215 if (hstat.contlen == -1) /* We don't know how much we were supposed
2216 to get, so assume we succeeded. */
2220 logprintf (LOG_VERBOSE,
2221 _("%s (%s) - `%s' saved [%ld]\n\n"),
2222 tms, tmrate, locf, hstat.len);
2223 logprintf (LOG_NONVERBOSE,
2224 "%s URL:%s [%ld] -> \"%s\" [%d]\n",
2225 tms, u->url, hstat.len, locf, count);
2228 total_downloaded_bytes += hstat.len;
2230 /* Remember that we downloaded the file for later ".orig" code. */
2231 if (*dt & ADDED_HTML_EXTENSION)
2232 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2234 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2236 free_hstat (&hstat);
2240 else if (hstat.len < hstat.contlen) /* meaning we lost the
2241 connection too soon */
2243 logprintf (LOG_VERBOSE,
2244 _("%s (%s) - Connection closed at byte %ld. "),
2245 tms, tmrate, hstat.len);
2246 printwhat (count, opt.ntry);
2247 free_hstat (&hstat);
2250 else if (!opt.kill_longer) /* meaning we got more than expected */
2252 logprintf (LOG_VERBOSE,
2253 _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
2254 tms, tmrate, locf, hstat.len, hstat.contlen);
2255 logprintf (LOG_NONVERBOSE,
2256 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2257 tms, u->url, hstat.len, hstat.contlen, locf, count);
2259 total_downloaded_bytes += hstat.len;
2261 /* Remember that we downloaded the file for later ".orig" code. */
2262 if (*dt & ADDED_HTML_EXTENSION)
2263 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2265 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2267 free_hstat (&hstat);
2271 else /* the same, but not accepted */
2273 logprintf (LOG_VERBOSE,
2274 _("%s (%s) - Connection closed at byte %ld/%ld. "),
2275 tms, tmrate, hstat.len, hstat.contlen);
2276 printwhat (count, opt.ntry);
2277 free_hstat (&hstat);
2281 else /* now hstat.res can only be -1 */
2283 if (hstat.contlen == -1)
2285 logprintf (LOG_VERBOSE,
2286 _("%s (%s) - Read error at byte %ld (%s)."),
2287 tms, tmrate, hstat.len, strerror (errno));
2288 printwhat (count, opt.ntry);
2289 free_hstat (&hstat);
2292 else /* hstat.res == -1 and contlen is given */
2294 logprintf (LOG_VERBOSE,
2295 _("%s (%s) - Read error at byte %ld/%ld (%s). "),
2296 tms, tmrate, hstat.len, hstat.contlen,
2298 printwhat (count, opt.ntry);
2299 free_hstat (&hstat);
2306 while (!opt.ntry || (count < opt.ntry));
2310 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2311 than local timezone.
2313 mktime is similar but assumes struct tm, also known as the
2314 "broken-down" form of time, is in local time zone. mktime_from_utc
2315 uses mktime to make the conversion understanding that an offset
2316 will be introduced by the local time assumption.
2318 mktime_from_utc then measures the introduced offset by applying
2319 gmtime to the initial result and applying mktime to the resulting
2320 "broken-down" form. The difference between the two mktime results
2321 is the measured offset which is then subtracted from the initial
2322 mktime result to yield a calendar time which is the value returned.
2324 tm_isdst in struct tm is set to 0 to force mktime to introduce a
2325 consistent offset (the non DST offset) since tm and tm+o might be
2326 on opposite sides of a DST change.
2328 Some implementations of mktime return -1 for the nonexistent
2329 localtime hour at the beginning of DST. In this event, use
2330 mktime(tm - 1hr) + 3600.
2334 gmtime(t+o) --> tm+o
2335 mktime(tm+o) --> t+2o
2336 t+o - (t+2o - t+o) = t
2338 Note that glibc contains a function of the same purpose named
2339 `timegm' (reverse of gmtime). But obviously, it is not universally
2340 available, and unfortunately it is not straightforwardly
2341 extractable for use here. Perhaps configure should detect timegm
2342 and use it where available.
2344 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2345 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2346 Further improved by Roger with assistance from Edward J. Sabol
2347 based on input by Jamie Zawinski. */
2350 mktime_from_utc (struct tm *t)
2361 return -1; /* can't deal with output from strptime */
2372 return -1; /* can't deal with output from gmtime */
2375 return (tl - (tb - tl));
2378 /* Check whether the result of strptime() indicates success.
2379 strptime() returns the pointer to how far it got to in the string.
2380 The processing has been successful if the string is at `GMT' or
2381 `+X', or at the end of the string.
2383 In extended regexp parlance, the function returns 1 if P matches
2384 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2385 can return) is considered a failure and 0 is returned. */
2387 check_end (const char *p)
2391 while (ISSPACE (*p))
2394 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2395 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2401 /* Convert the textual specification of time in TIME_STRING to the
2402 number of seconds since the Epoch.
2404 TIME_STRING can be in any of the three formats RFC2068 allows the
2405 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date.
2406 Timezones are ignored, and should be GMT.
2408 Return the computed time_t representation, or -1 if the conversion
2411 This function uses strptime with various string formats for parsing
2412 TIME_STRING. This results in a parser that is not as lenient in
2413 interpreting TIME_STRING as I would like it to be. Being based on
2414 strptime, it always allows shortened months, one-digit days, etc.,
2415 but due to the multitude of formats in which time can be
2416 represented, an ideal HTTP time parser would be even more
2417 forgiving. It should completely ignore things like week days and
2418 concentrate only on the various forms of representing years,
2419 months, days, hours, minutes, and seconds. For example, it would
2420 be nice if it accepted ISO 8601 out of the box.
2422 I've investigated free and PD code for this purpose, but none was
2423 usable. getdate was big and unwieldy, and had potential copyright
2424 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2425 distributed with phttpd, is excellent, but we cannot use it because
2426 it is not assigned to the FSF. So I stuck it with strptime. */
2429 http_atotm (const char *time_string)
2431 /* NOTE: Solaris strptime man page claims that %n and %t match white
2432 space, but that's not universally available. Instead, we simply
2433 use ` ' to mean "skip all WS", which works under all strptime
2434 implementations I've tested. */
2436 static const char *time_formats[] = {
2437 "%a, %d %b %Y %T", /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
2438 "%A, %d-%b-%y %T", /* RFC850: Thursday, 29-Jan-98 22:12:57 */
2439 "%a, %d-%b-%Y %T", /* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
2440 (google.com uses this for their cookies.) */
2441 "%a %b %d %T %Y" /* asctime: Thu Jan 29 22:12:57 1998 */
2447 /* According to Roger Beeman, we need to initialize tm_isdst, since
2448 strptime won't do it. */
2451 /* Note that under foreign locales Solaris strptime() fails to
2452 recognize English dates, which renders this function useless. We
2453 solve this by being careful not to affect LC_TIME when
2454 initializing locale.
2456 Another solution would be to temporarily set locale to C, invoke
2457 strptime(), and restore it back. This is slow and dirty,
2458 however, and locale support other than LC_MESSAGES can mess other
2459 things, so I rather chose to stick with just setting LC_MESSAGES.
2461 GNU strptime does not have this problem because it recognizes
2462 both international and local dates. */
2464 for (i = 0; i < countof (time_formats); i++)
2465 if (check_end (strptime (time_string, time_formats[i], &t)))
2466 return mktime_from_utc (&t);
2468 /* All formats have failed. */
2472 /* Authorization support: We support two authorization schemes:
2474 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2476 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2477 consisting of answering to the server's challenge with the proper
2480 /* How many bytes it will take to store LEN bytes in base64. */
2481 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
2483 /* Encode the string S of length LENGTH to base64 format and place it
2484 to STORE. STORE will be 0-terminated, and must point to a writable
2485 buffer of at least 1+BASE64_LENGTH(length) bytes. */
2487 base64_encode (const char *s, char *store, int length)
2489 /* Conversion table. */
2490 static char tbl[64] = {
2491 'A','B','C','D','E','F','G','H',
2492 'I','J','K','L','M','N','O','P',
2493 'Q','R','S','T','U','V','W','X',
2494 'Y','Z','a','b','c','d','e','f',
2495 'g','h','i','j','k','l','m','n',
2496 'o','p','q','r','s','t','u','v',
2497 'w','x','y','z','0','1','2','3',
2498 '4','5','6','7','8','9','+','/'
2501 unsigned char *p = (unsigned char *)store;
2503 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
2504 for (i = 0; i < length; i += 3)
2506 *p++ = tbl[s[0] >> 2];
2507 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2508 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2509 *p++ = tbl[s[2] & 0x3f];
2512 /* Pad the result if necessary... */
2513 if (i == length + 1)
2515 else if (i == length + 2)
2516 *(p - 1) = *(p - 2) = '=';
2517 /* ...and zero-terminate it. */
2521 /* Create the authentication header contents for the `Basic' scheme.
2522 This is done by encoding the string `USER:PASS' in base64 and
2523 prepending `HEADER: Basic ' to it. */
2525 basic_authentication_encode (const char *user, const char *passwd)
2527 char *t1, *t2, *res;
2528 int len1 = strlen (user) + 1 + strlen (passwd);
2529 int len2 = BASE64_LENGTH (len1);
2531 t1 = (char *)alloca (len1 + 1);
2532 sprintf (t1, "%s:%s", user, passwd);
2534 t2 = (char *)alloca (len2 + 1);
2535 base64_encode (t1, t2, len1);
2537 res = (char *)xmalloc (6 + len2 + 1);
2538 sprintf (res, "Basic %s", t2);
2543 #define SKIP_WS(x) do { \
2544 while (ISSPACE (*(x))) \
2549 /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
2550 of a field in such a header. If the field is the one specified by
2551 ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2552 digest authorization code), extract its value in the (char*)
2553 variable pointed by RET. Returns negative on a malformed header,
2554 or number of bytes that have been parsed by this call. */
2556 extract_header_attr (const char *au, const char *attr_name, char **ret)
2558 const char *cp, *ep;
2562 if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2564 cp += strlen (attr_name);
2577 for (ep = cp; *ep && *ep != '\"'; ep++)
2582 *ret = strdupdelim (cp, ep);
2589 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2590 an array of 16 bytes containing the hash keys, and BUF should be a
2591 buffer of 33 writable characters (32 for hex digits plus one for
2592 zero termination). */
2594 dump_hash (unsigned char *buf, const unsigned char *hash)
2598 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2600 *buf++ = XNUM_TO_digit (*hash >> 4);
2601 *buf++ = XNUM_TO_digit (*hash & 0xf);
2606 /* Take the line apart to find the challenge, and compose a digest
2607 authorization header. See RFC2069 section 2.1.2. */
2609 digest_authentication_encode (const char *au, const char *user,
2610 const char *passwd, const char *method,
2613 static char *realm, *opaque, *nonce;
2618 { "realm", &realm },
2619 { "opaque", &opaque },
2624 realm = opaque = nonce = NULL;
2626 au += 6; /* skip over `Digest' */
2632 for (i = 0; i < countof (options); i++)
2634 int skip = extract_header_attr (au, options[i].name,
2635 options[i].variable);
2639 xfree_null (opaque);
2649 if (i == countof (options))
2651 while (*au && *au != '=')
2659 while (*au && *au != '\"')
2666 while (*au && *au != ',')
2671 if (!realm || !nonce || !user || !passwd || !path || !method)
2674 xfree_null (opaque);
2679 /* Calculate the digest value. */
2681 ALLOCA_MD5_CONTEXT (ctx);
2682 unsigned char hash[MD5_HASHLEN];
2683 unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2684 unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2686 /* A1BUF = H(user ":" realm ":" password) */
2688 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2689 gen_md5_update ((unsigned char *)":", 1, ctx);
2690 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2691 gen_md5_update ((unsigned char *)":", 1, ctx);
2692 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2693 gen_md5_finish (ctx, hash);
2694 dump_hash (a1buf, hash);
2696 /* A2BUF = H(method ":" path) */
2698 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2699 gen_md5_update ((unsigned char *)":", 1, ctx);
2700 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2701 gen_md5_finish (ctx, hash);
2702 dump_hash (a2buf, hash);
2704 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2706 gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2707 gen_md5_update ((unsigned char *)":", 1, ctx);
2708 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2709 gen_md5_update ((unsigned char *)":", 1, ctx);
2710 gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2711 gen_md5_finish (ctx, hash);
2712 dump_hash (response_digest, hash);
2714 res = (char*) xmalloc (strlen (user)
2719 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2720 + (opaque ? strlen (opaque) : 0)
2722 sprintf (res, "Digest \
2723 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2724 user, realm, nonce, path, response_digest);
2727 char *p = res + strlen (res);
2728 strcat (p, ", opaque=\"");
2735 #endif /* USE_DIGEST */
2738 #define BEGINS_WITH(line, string_constant) \
2739 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
2740 && (ISSPACE (line[sizeof (string_constant) - 1]) \
2741 || !line[sizeof (string_constant) - 1]))
2744 known_authentication_scheme_p (const char *au)
2746 return BEGINS_WITH (au, "Basic")
2747 || BEGINS_WITH (au, "Digest")
2748 || BEGINS_WITH (au, "NTLM");
2753 /* Create the HTTP authorization request header. When the
2754 `WWW-Authenticate' response header is seen, according to the
2755 authorization scheme specified in that header (`Basic' and `Digest'
2756 are supported by the current implementation), produce an
2757 appropriate HTTP authorization request header. */
2759 create_authorization_line (const char *au, const char *user,
2760 const char *passwd, const char *method,
2763 if (0 == strncasecmp (au, "Basic", 5))
2764 return basic_authentication_encode (user, passwd);
2766 if (0 == strncasecmp (au, "Digest", 6))
2767 return digest_authentication_encode (au, user, passwd, method, path);
2768 #endif /* USE_DIGEST */