2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
35 #include <sys/types.h>
46 #if TIME_WITH_SYS_TIME
47 # include <sys/time.h>
51 # include <sys/time.h>
68 # include "gen_sslfunc.h"
76 extern char *version_string;
77 extern LARGE_INT total_downloaded_bytes;
80 # define MIN(x, y) ((x) > (y) ? (y) : (x))
84 static int cookies_loaded_p;
85 struct cookie_jar *wget_cookie_jar;
87 #define TEXTHTML_S "text/html"
88 #define TEXTXHTML_S "application/xhtml+xml"
90 /* Some status code validation macros: */
91 #define H_20X(x) (((x) >= 200) && ((x) < 300))
92 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
93 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
94 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
95 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
97 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
99 #define HTTP_STATUS_OK 200
100 #define HTTP_STATUS_CREATED 201
101 #define HTTP_STATUS_ACCEPTED 202
102 #define HTTP_STATUS_NO_CONTENT 204
103 #define HTTP_STATUS_PARTIAL_CONTENTS 206
105 /* Redirection 3xx. */
106 #define HTTP_STATUS_MULTIPLE_CHOICES 300
107 #define HTTP_STATUS_MOVED_PERMANENTLY 301
108 #define HTTP_STATUS_MOVED_TEMPORARILY 302
109 #define HTTP_STATUS_NOT_MODIFIED 304
110 #define HTTP_STATUS_TEMPORARY_REDIRECT 307
112 /* Client error 4xx. */
113 #define HTTP_STATUS_BAD_REQUEST 400
114 #define HTTP_STATUS_UNAUTHORIZED 401
115 #define HTTP_STATUS_FORBIDDEN 403
116 #define HTTP_STATUS_NOT_FOUND 404
118 /* Server errors 5xx. */
119 #define HTTP_STATUS_INTERNAL 500
120 #define HTTP_STATUS_NOT_IMPLEMENTED 501
121 #define HTTP_STATUS_BAD_GATEWAY 502
122 #define HTTP_STATUS_UNAVAILABLE 503
125 rel_none, rel_name, rel_value, rel_both
132 struct request_header {
134 enum rp release_policy;
136 int hcount, hcapacity;
139 /* Create a new, empty request. At least request_set_method must be
140 called before the request can be used. */
142 static struct request *
145 struct request *req = xnew0 (struct request);
147 req->headers = xnew_array (struct request_header, req->hcapacity);
151 /* Set the request's method and its arguments. METH should be a
152 literal string (or it should outlive the request) because it will
153 not be freed. ARG will be freed by request_free. */
156 request_set_method (struct request *req, const char *meth, char *arg)
162 /* Return the method string passed with the last call to
163 request_set_method. */
166 request_method (const struct request *req)
171 /* Free one header according to the release policy specified with
172 request_set_header. */
175 release_header (struct request_header *hdr)
177 switch (hdr->release_policy)
194 /* Set the request named NAME to VALUE. Specifically, this means that
195 a "NAME: VALUE\r\n" header line will be used in the request. If a
196 header with the same name previously existed in the request, its
197 value will be replaced by this one.
199 RELEASE_POLICY determines whether NAME and VALUE should be released
200 (freed) with request_free. Allowed values are:
202 - rel_none - don't free NAME or VALUE
203 - rel_name - free NAME when done
204 - rel_value - free VALUE when done
205 - rel_both - free both NAME and VALUE when done
207 Setting release policy is useful when arguments come from different
208 sources. For example:
210 // Don't free literal strings!
211 request_set_header (req, "Pragma", "no-cache", rel_none);
213 // Don't free a global variable, we'll need it later.
214 request_set_header (req, "Referer", opt.referer, rel_none);
216 // Value freshly allocated, free it when done.
217 request_set_header (req, "Range", aprintf ("bytes=%ld-", hs->restval),
222 request_set_header (struct request *req, char *name, char *value,
223 enum rp release_policy)
225 struct request_header *hdr;
229 for (i = 0; i < req->hcount; i++)
231 hdr = &req->headers[i];
232 if (0 == strcasecmp (name, hdr->name))
234 /* Replace existing header. */
235 release_header (hdr);
238 hdr->release_policy = release_policy;
243 /* Install new header. */
245 if (req->hcount >= req->hcount)
247 req->hcapacity <<= 1;
248 req->headers = xrealloc (req->headers,
249 req->hcapacity * sizeof (struct request_header));
251 hdr = &req->headers[req->hcount++];
254 hdr->release_policy = release_policy;
257 /* Like request_set_header, but sets the whole header line, as
258 provided by the user using the `--header' option. For example,
259 request_set_user_header (req, "Foo: bar") works just like
260 request_set_header (req, "Foo", "bar"). */
263 request_set_user_header (struct request *req, const char *header)
266 const char *p = strchr (header, ':');
269 BOUNDED_TO_ALLOCA (header, p, name);
273 request_set_header (req, xstrdup (name), (char *) p, rel_name);
276 #define APPEND(p, str) do { \
277 int A_len = strlen (str); \
278 memcpy (p, str, A_len); \
282 /* Construct the request and write it to FD using fd_write. */
285 request_send (const struct request *req, int fd)
287 char *request_string, *p;
288 int i, size, write_error;
290 /* Count the request size. */
293 /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
294 size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
296 for (i = 0; i < req->hcount; i++)
298 struct request_header *hdr = &req->headers[i];
299 /* NAME ": " VALUE "\r\n" */
300 size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
306 p = request_string = alloca_array (char, size);
308 /* Generate the request. */
310 APPEND (p, req->method); *p++ = ' ';
311 APPEND (p, req->arg); *p++ = ' ';
312 memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
314 for (i = 0; i < req->hcount; i++)
316 struct request_header *hdr = &req->headers[i];
317 APPEND (p, hdr->name);
318 *p++ = ':', *p++ = ' ';
319 APPEND (p, hdr->value);
320 *p++ = '\r', *p++ = '\n';
323 *p++ = '\r', *p++ = '\n', *p++ = '\0';
324 assert (p - request_string == size);
328 DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
330 /* Send the request to the server. */
332 write_error = fd_write (fd, request_string, size - 1, -1);
334 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
339 /* Release the resources used by REQ. */
342 request_free (struct request *req)
345 xfree_null (req->arg);
346 for (i = 0; i < req->hcount; i++)
347 release_header (&req->headers[i]);
348 xfree_null (req->headers);
352 /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
353 PROMISED_SIZE bytes are sent over the wire -- if the file is
354 longer, read only that much; if the file is shorter, report an error. */
357 post_file (int sock, const char *file_name, long promised_size)
359 static char chunk[8192];
364 DEBUGP (("[writing POST file %s ... ", file_name));
366 fp = fopen (file_name, "rb");
369 while (!feof (fp) && written < promised_size)
372 int length = fread (chunk, 1, sizeof (chunk), fp);
375 towrite = MIN (promised_size - written, length);
376 write_error = fd_write (sock, chunk, towrite, -1);
386 /* If we've written less than was promised, report a (probably
387 nonsensical) error rather than break the promise. */
388 if (written < promised_size)
394 assert (written == promised_size);
395 DEBUGP (("done]\n"));
400 head_terminator (const char *hunk, int oldlen, int peeklen)
402 const char *start, *end;
404 /* If at first peek, verify whether HUNK starts with "HTTP". If
405 not, this is a HTTP/0.9 request and we must bail out without
407 if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
413 start = hunk + oldlen - 4;
414 end = hunk + oldlen + peeklen;
416 for (; start < end - 1; start++)
423 if (start[1] == '\n')
429 /* Read the HTTP request head from FD and return it. The error
430 conditions are the same as with fd_read_hunk.
432 To support HTTP/0.9 responses, this function tries to make sure
433 that the data begins with "HTTP". If this is not the case, no data
434 is read and an empty request is returned, so that the remaining
435 data can be treated as body. */
438 fd_read_http_head (int fd)
440 return fd_read_hunk (fd, head_terminator, 512);
444 /* The response data. */
447 /* The array of pointers that indicate where each header starts.
448 For example, given this HTTP response:
455 The headers are located like this:
457 "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
459 headers[0] headers[1] headers[2] headers[3]
461 I.e. headers[0] points to the beginning of the request,
462 headers[1] points to the end of the first header and the
463 beginning of the second one, etc. */
465 const char **headers;
468 /* Create a new response object from the text of the HTTP response,
469 available in HEAD. That text is automatically split into
470 constituent header lines for fast retrieval using
471 response_header_*. */
473 static struct response *
474 response_new (const char *head)
479 struct response *resp = xnew0 (struct response);
484 /* Empty head means that we're dealing with a headerless
485 (HTTP/0.9) response. In that case, don't set HEADERS at
490 /* Split HEAD into header lines, so that response_header_* functions
491 don't need to do this over and over again. */
497 DO_REALLOC (resp->headers, size, count + 1, const char *);
498 resp->headers[count++] = hdr;
500 /* Break upon encountering an empty line. */
501 if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
504 /* Find the end of HDR, including continuations. */
507 const char *end = strchr (hdr, '\n');
513 while (*hdr == ' ' || *hdr == '\t');
515 DO_REALLOC (resp->headers, size, count + 1, const char *);
516 resp->headers[count++] = NULL;
521 /* Locate the header named NAME in the request data. If found, set
522 *BEGPTR to its starting, and *ENDPTR to its ending position, and
523 return 1. Otherwise return 0.
525 This function is used as a building block for response_header_copy
526 and response_header_strdup. */
529 response_header_bounds (const struct response *resp, const char *name,
530 const char **begptr, const char **endptr)
533 const char **headers = resp->headers;
536 if (!headers || !headers[1])
539 name_len = strlen (name);
541 for (i = 1; headers[i + 1]; i++)
543 const char *b = headers[i];
544 const char *e = headers[i + 1];
546 && b[name_len] == ':'
547 && 0 == strncasecmp (b, name, name_len))
550 while (b < e && ISSPACE (*b))
552 while (b < e && ISSPACE (e[-1]))
562 /* Copy the response header named NAME to buffer BUF, no longer than
563 BUFSIZE (BUFSIZE includes the terminating 0). If the header
564 exists, 1 is returned, otherwise 0. If there should be no limit on
565 the size of the header, use response_header_strdup instead.
567 If BUFSIZE is 0, no data is copied, but the boolean indication of
568 whether the header is present is still returned. */
571 response_header_copy (const struct response *resp, const char *name,
572 char *buf, int bufsize)
575 if (!response_header_bounds (resp, name, &b, &e))
579 int len = MIN (e - b, bufsize);
580 strncpy (buf, b, len);
586 /* Return the value of header named NAME in RESP, allocated with
587 malloc. If such a header does not exist in RESP, return NULL. */
590 response_header_strdup (const struct response *resp, const char *name)
593 if (!response_header_bounds (resp, name, &b, &e))
595 return strdupdelim (b, e);
598 /* Parse the HTTP status line, which is of format:
600 HTTP-Version SP Status-Code SP Reason-Phrase
602 The function returns the status-code, or -1 if the status line
603 appears malformed. The pointer to "reason-phrase" message is
604 returned in *MESSAGE. */
607 response_status (const struct response *resp, char **message)
614 /* For a HTTP/0.9 response, assume status 200. */
616 *message = xstrdup (_("No headers, assuming HTTP/0.9"));
620 p = resp->headers[0];
621 end = resp->headers[1];
627 if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
631 /* Match the HTTP version. This is optional because Gnutella
632 servers have been reported to not specify HTTP version. */
633 if (p < end && *p == '/')
636 while (p < end && ISDIGIT (*p))
638 if (p < end && *p == '.')
640 while (p < end && ISDIGIT (*p))
644 while (p < end && ISSPACE (*p))
646 if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
649 status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
654 while (p < end && ISSPACE (*p))
656 while (p < end && ISSPACE (end[-1]))
658 *message = strdupdelim (p, end);
664 /* Release the resources used by RESP. */
667 response_free (struct response *resp)
669 xfree_null (resp->headers);
673 /* Print [b, e) to the log, omitting the trailing CRLF. */
676 print_server_response_1 (const char *prefix, const char *b, const char *e)
679 if (b < e && e[-1] == '\n')
681 if (b < e && e[-1] == '\r')
683 BOUNDED_TO_ALLOCA (b, e, ln);
684 logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln);
687 /* Print the server response, line by line, omitting the trailing CR
688 characters, prefixed with PREFIX. */
691 print_server_response (const struct response *resp, const char *prefix)
696 for (i = 0; resp->headers[i + 1]; i++)
697 print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]);
700 /* Parse the `Content-Range' header and extract the information it
701 contains. Returns 1 if successful, -1 otherwise. */
703 parse_content_range (const char *hdr, long *first_byte_ptr,
704 long *last_byte_ptr, long *entity_length_ptr)
708 /* Ancient versions of Netscape proxy server, presumably predating
709 rfc2068, sent out `Content-Range' without the "bytes"
711 if (!strncasecmp (hdr, "bytes", 5))
714 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
718 while (ISSPACE (*hdr))
725 for (num = 0; ISDIGIT (*hdr); hdr++)
726 num = 10 * num + (*hdr - '0');
727 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
729 *first_byte_ptr = num;
731 for (num = 0; ISDIGIT (*hdr); hdr++)
732 num = 10 * num + (*hdr - '0');
733 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
735 *last_byte_ptr = num;
737 for (num = 0; ISDIGIT (*hdr); hdr++)
738 num = 10 * num + (*hdr - '0');
739 *entity_length_ptr = num;
743 /* Read the body of the request, but don't store it anywhere and don't
744 display a progress gauge. This is useful for reading the error
745 responses whose bodies don't need to be displayed or logged, but
746 which need to be read anyway. */
749 skip_short_body (int fd, long contlen)
751 /* Skipping the body doesn't make sense if the content length is
752 unknown because, in that case, persistent connections cannot be
753 used. (#### This is not the case with HTTP/1.1 where they can
754 still be used with the magic of the "chunked" transfer!) */
757 DEBUGP (("Skipping %ld bytes of body data... ", contlen));
762 int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1);
767 DEBUGP (("done.\n"));
770 /* Persistent connections. Currently, we cache the most recently used
771 connection as persistent, provided that the HTTP server agrees to
772 make it such. The persistence data is stored in the variables
773 below. Ideally, it should be possible to cache an arbitrary fixed
774 number of these connections. */
776 /* Whether a persistent connection is active. */
777 static int pconn_active;
780 /* The socket of the connection. */
783 /* Host and port of the currently active persistent connection. */
787 /* Whether a ssl handshake has occoured on this connection. */
791 /* Mark the persistent connection as invalid and free the resources it
792 uses. This is used by the CLOSE_* macros after they forcefully
793 close a registered persistent connection. */
796 invalidate_persistent (void)
798 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
800 fd_close (pconn.socket);
805 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
806 persistent. This will enable someone to use the same connection
807 later. In the context of HTTP, this must be called only AFTER the
808 response has been received and the server has promised that the
809 connection will remain alive.
811 If a previous connection was persistent, it is closed. */
814 register_persistent (const char *host, int port, int fd, int ssl)
818 if (pconn.socket == fd)
820 /* The connection FD is already registered. */
825 /* The old persistent connection is still active; close it
826 first. This situation arises whenever a persistent
827 connection exists, but we then connect to a different
828 host, and try to register a persistent connection to that
830 invalidate_persistent ();
836 pconn.host = xstrdup (host);
840 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
843 /* Return non-zero if a persistent connection is available for
844 connecting to HOST:PORT. */
847 persistent_available_p (const char *host, int port, int ssl,
848 int *host_lookup_failed)
850 /* First, check whether a persistent connection is active at all. */
854 /* If we want SSL and the last connection wasn't or vice versa,
855 don't use it. Checking for host and port is not enough because
856 HTTP and HTTPS can apparently coexist on the same port. */
857 if (ssl != pconn.ssl)
860 /* If we're not connecting to the same port, we're not interested. */
861 if (port != pconn.port)
864 /* If the host is the same, we're in business. If not, there is
865 still hope -- read below. */
866 if (0 != strcasecmp (host, pconn.host))
868 /* If pconn.socket is already talking to HOST, we needn't
869 reconnect. This happens often when both sites are virtual
870 hosts distinguished only by name and served by the same
871 network interface, and hence the same web server (possibly
872 set up by the ISP and serving many different web sites).
873 This admittedly non-standard optimization does not contradict
874 HTTP and works well with popular server software. */
878 struct address_list *al;
881 /* Don't try to talk to two different SSL sites over the same
882 secure connection! (Besides, it's not clear if name-based
883 virtual hosting is even possible with SSL.) */
886 /* If pconn.socket's peer is one of the IP addresses HOST
887 resolves to, pconn.socket is for all intents and purposes
888 already talking to HOST. */
890 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
892 /* Can't get the peer's address -- something must be very
893 wrong with the connection. */
894 invalidate_persistent ();
897 al = lookup_host (host, 0);
900 *host_lookup_failed = 1;
904 found = address_list_contains (al, &ip);
905 address_list_release (al);
910 /* The persistent connection's peer address was found among the
911 addresses HOST resolved to; therefore, pconn.sock is in fact
912 already talking to HOST -- no need to reconnect. */
915 /* Finally, check whether the connection is still open. This is
916 important because most server implement a liberal (short) timeout
917 on persistent connections. Wget can of course always reconnect
918 if the connection doesn't work out, but it's nicer to know in
919 advance. This test is a logical followup of the first test, but
920 is "expensive" and therefore placed at the end of the list. */
922 if (!test_socket_open (pconn.socket))
924 /* Oops, the socket is no longer open. Now that we know that,
925 let's invalidate the persistent connection before returning
927 invalidate_persistent ();
934 /* The idea behind these two CLOSE macros is to distinguish between
935 two cases: one when the job we've been doing is finished, and we
936 want to close the connection and leave, and two when something is
937 seriously wrong and we're closing the connection as part of
940 In case of keep_alive, CLOSE_FINISH should leave the connection
941 open, while CLOSE_INVALIDATE should still close it.
943 Note that the semantics of the flag `keep_alive' is "this
944 connection *will* be reused (the server has promised not to close
945 the connection once we're done)", while the semantics of
946 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
947 active, registered connection". */
949 #define CLOSE_FINISH(fd) do { \
952 if (pconn_active && (fd) == pconn.socket) \
953 invalidate_persistent (); \
962 #define CLOSE_INVALIDATE(fd) do { \
963 if (pconn_active && (fd) == pconn.socket) \
964 invalidate_persistent (); \
972 long len; /* received length */
973 long contlen; /* expected length */
974 long restval; /* the restart value */
975 int res; /* the result of last read */
976 char *newloc; /* new location (redirection) */
977 char *remote_time; /* remote time-stamp string */
978 char *error; /* textual HTTP error */
979 int statcode; /* status code */
980 double dltime; /* time of the download in msecs */
981 int no_truncate; /* whether truncating the file is
983 const char *referer; /* value of the referer header. */
984 char **local_file; /* local file. */
988 free_hstat (struct http_stat *hs)
990 xfree_null (hs->newloc);
991 xfree_null (hs->remote_time);
992 xfree_null (hs->error);
994 /* Guard against being called twice. */
996 hs->remote_time = NULL;
1000 static char *create_authorization_line PARAMS ((const char *, const char *,
1001 const char *, const char *,
1003 static char *basic_authentication_encode PARAMS ((const char *, const char *));
1004 static int known_authentication_scheme_p PARAMS ((const char *));
1006 time_t http_atotm PARAMS ((const char *));
1008 #define BEGINS_WITH(line, string_constant) \
1009 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1010 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1011 || !line[sizeof (string_constant) - 1]))
1013 /* Retrieve a document through HTTP protocol. It recognizes status
1014 code, and correctly handles redirections. It closes the network
1015 socket. If it receives an error from the functions below it, it
1016 will print it if there is enough information to do so (almost
1017 always), returning the error to the caller (i.e. http_loop).
1019 Various HTTP parameters are stored to hs.
1021 If PROXY is non-NULL, the connection will be made to the proxy
1022 server, and u->url will be requested. */
1024 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
1026 struct request *req;
1029 char *user, *passwd;
1033 long contlen, contrange;
1039 /* Whether authorization has been already tried. */
1040 int auth_tried_already = 0;
1042 /* Whether our connection to the remote host is through SSL. */
1046 struct response *resp;
1050 /* Whether this connection will be kept alive after the HTTP request
1054 /* Whether keep-alive should be inhibited. */
1055 int inhibit_keep_alive = !opt.http_keep_alive;
1057 /* Headers sent when using POST. */
1058 long post_data_size = 0;
1060 int host_lookup_failed = 0;
1063 if (u->scheme == SCHEME_HTTPS)
1065 /* Initialize the SSL context. After this has once been done,
1066 it becomes a no-op. */
1067 switch (ssl_init ())
1069 case SSLERRCTXCREATE:
1071 logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
1072 return SSLERRCTXCREATE;
1073 case SSLERRCERTFILE:
1074 /* try without certfile */
1075 logprintf (LOG_NOTQUIET,
1076 _("Failed to load certificates from %s\n"),
1078 logprintf (LOG_NOTQUIET,
1079 _("Trying without the specified certificate\n"));
1082 logprintf (LOG_NOTQUIET,
1083 _("Failed to get certificate key from %s\n"),
1085 logprintf (LOG_NOTQUIET,
1086 _("Trying without the specified certificate\n"));
1092 #endif /* HAVE_SSL */
1094 if (!(*dt & HEAD_ONLY))
1095 /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
1096 know the local filename so we can save to it. */
1097 assert (*hs->local_file != NULL);
1099 auth_tried_already = 0;
1101 /* Initialize certain elements of struct http_stat. */
1106 hs->remote_time = NULL;
1114 char *proxy_user, *proxy_passwd;
1115 /* For normal username and password, URL components override
1116 command-line/wgetrc parameters. With proxy
1117 authentication, it's the reverse, because proxy URLs are
1118 normally the "permanent" ones, so command-line args
1119 should take precedence. */
1120 if (opt.proxy_user && opt.proxy_passwd)
1122 proxy_user = opt.proxy_user;
1123 proxy_passwd = opt.proxy_passwd;
1127 proxy_user = proxy->user;
1128 proxy_passwd = proxy->passwd;
1130 /* #### This does not appear right. Can't the proxy request,
1131 say, `Digest' authentication? */
1132 if (proxy_user && proxy_passwd)
1133 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1135 /* If we're using a proxy, we will be connecting to the proxy
1140 /* Prepare the request to send. */
1142 req = request_new ();
1144 const char *meth = "GET";
1145 if (*dt & HEAD_ONLY)
1147 else if (opt.post_file_name || opt.post_data)
1149 /* Use the full path, i.e. one that includes the leading slash and
1150 the query string. E.g. if u->path is "foo/bar" and u->query is
1151 "param=value", full_path will be "/foo/bar?param=value". */
1152 request_set_method (req, meth,
1153 proxy ? xstrdup (u->url) : url_full_path (u));
1156 request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1157 if (*dt & SEND_NOCACHE)
1158 request_set_header (req, "Pragma", "no-cache", rel_none);
1160 request_set_header (req, "Range",
1161 aprintf ("bytes=%ld-", hs->restval), rel_value);
1163 request_set_header (req, "User-Agent", opt.useragent, rel_none);
1165 request_set_header (req, "User-Agent",
1166 aprintf ("Wget/%s", version_string), rel_value);
1167 request_set_header (req, "Accept", "*/*", rel_none);
1169 /* Find the username and password for authentication. */
1172 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1173 user = user ? user : opt.http_user;
1174 passwd = passwd ? passwd : opt.http_passwd;
1178 /* We have the username and the password, but haven't tried
1179 any authorization yet. Let's see if the "Basic" method
1180 works. If not, we'll come back here and construct a
1181 proper authorization method with the right challenges.
1183 If we didn't employ this kind of logic, every URL that
1184 requires authorization would have to be processed twice,
1185 which is very suboptimal and generates a bunch of false
1186 "unauthorized" errors in the server log.
1188 #### But this logic also has a serious problem when used
1189 with stronger authentications: we *first* transmit the
1190 username and the password in clear text, and *then* attempt a
1191 stronger authentication scheme. That cannot be right! We
1192 are only fortunate that almost everyone still uses the
1193 `Basic' scheme anyway.
1195 There should be an option to prevent this from happening, for
1196 those who use strong authentication schemes and value their
1198 request_set_header (req, "Authorization",
1199 basic_authentication_encode (user, passwd),
1204 /* Whether we need to print the host header with braces around
1205 host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
1206 usual "Host: symbolic-name:1234". */
1207 int squares = strchr (u->host, ':') != NULL;
1208 if (u->port == scheme_default_port (u->scheme))
1209 request_set_header (req, "Host",
1210 aprintf (squares ? "[%s]" : "%s", u->host),
1213 request_set_header (req, "Host",
1214 aprintf (squares ? "[%s]:%d" : "%s:%d",
1219 if (!inhibit_keep_alive)
1220 request_set_header (req, "Connection", "Keep-Alive", rel_none);
1223 request_set_header (req, "Cookie",
1224 cookie_header (wget_cookie_jar,
1225 u->host, u->port, u->path,
1227 u->scheme == SCHEME_HTTPS
1234 if (opt.post_data || opt.post_file_name)
1236 request_set_header (req, "Content-Type",
1237 "application/x-www-form-urlencoded", rel_none);
1239 post_data_size = strlen (opt.post_data);
1242 post_data_size = file_size (opt.post_file_name);
1243 if (post_data_size == -1)
1245 logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
1246 opt.post_file_name);
1250 request_set_header (req, "Content-Length",
1251 aprintf ("Content-Length: %ld", post_data_size),
1255 /* Add the user headers. */
1256 if (opt.user_headers)
1259 for (i = 0; opt.user_headers[i]; i++)
1260 request_set_user_header (req, opt.user_headers[i]);
1264 /* We need to come back here when the initial attempt to retrieve
1265 without authorization header fails. (Expected to happen at least
1266 for the Digest authorization scheme.) */
1270 /* Establish the connection. */
1272 if (!inhibit_keep_alive)
1274 /* Look for a persistent connection to target host, unless a
1275 proxy is used. The exception is when SSL is in use, in which
1276 case the proxy is nothing but a passthrough to the target
1277 host, registered as a connection to the latter. */
1278 struct url *relevant = conn;
1280 if (u->scheme == SCHEME_HTTPS)
1284 if (persistent_available_p (relevant->host, relevant->port,
1286 relevant->scheme == SCHEME_HTTPS,
1290 &host_lookup_failed))
1292 sock = pconn.socket;
1293 using_ssl = pconn.ssl;
1294 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1295 pconn.host, pconn.port);
1296 DEBUGP (("Reusing fd %d.\n", sock));
1302 /* In its current implementation, persistent_available_p will
1303 look up conn->host in some cases. If that lookup failed, we
1304 don't need to bother with connect_to_host. */
1305 if (host_lookup_failed)
1308 sock = connect_to_host (conn->host, conn->port);
1312 return (retryable_socket_connect_error (errno)
1313 ? CONERROR : CONIMPOSSIBLE);
1316 if (proxy && u->scheme == SCHEME_HTTPS)
1318 /* When requesting SSL URLs through proxies, use the
1319 CONNECT method to request passthrough. */
1320 struct request *connreq = request_new ();
1321 request_set_method (connreq, "CONNECT",
1322 aprintf ("%s:%d", u->host, u->port));
1325 request_set_header (connreq, "Proxy-Authorization",
1326 proxyauth, rel_value);
1327 /* Now that PROXYAUTH is part of the CONNECT request,
1328 zero it out so we don't send proxy authorization with
1329 the regular request below. */
1333 write_error = request_send (connreq, sock);
1334 request_free (connreq);
1335 if (write_error < 0)
1337 logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"),
1339 CLOSE_INVALIDATE (sock);
1343 head = fd_read_http_head (sock);
1346 logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1348 CLOSE_INVALIDATE (sock);
1357 DEBUGP (("proxy responded with: [%s]\n", head));
1359 resp = response_new (head);
1360 statcode = response_status (resp, &message);
1361 response_free (resp);
1362 if (statcode != 200)
1365 logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1366 message ? message : "?");
1367 xfree_null (message);
1372 /* SOCK is now *really* connected to u->host, so update CONN
1373 to reflect this. That way register_persistent will
1374 register SOCK as being connected to u->host:u->port. */
1378 if (conn->scheme == SCHEME_HTTPS)
1380 if (!ssl_connect (sock))
1387 #endif /* HAVE_SSL */
1390 /* Send the request to server. */
1391 write_error = request_send (req, sock);
1393 if (write_error >= 0)
1397 DEBUGP (("[POST data: %s]\n", opt.post_data));
1398 write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1400 else if (opt.post_file_name && post_data_size != 0)
1401 write_error = post_file (sock, opt.post_file_name, post_data_size);
1404 if (write_error < 0)
1406 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1408 CLOSE_INVALIDATE (sock);
1412 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1413 proxy ? "Proxy" : "HTTP");
1420 head = fd_read_http_head (sock);
1425 logputs (LOG_NOTQUIET, _("No data received.\n"));
1426 CLOSE_INVALIDATE (sock);
1432 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1434 CLOSE_INVALIDATE (sock);
1439 DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1441 resp = response_new (head);
1443 /* Check for status line. */
1445 statcode = response_status (resp, &message);
1446 if (!opt.server_response)
1447 logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
1450 logprintf (LOG_VERBOSE, "\n");
1451 print_server_response (resp, " ");
1454 if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1455 contlen = strtol (hdrval, NULL, 10);
1457 /* Check for keep-alive related responses. */
1458 if (!inhibit_keep_alive && contlen != -1)
1460 if (response_header_copy (resp, "Keep-Alive", NULL, 0))
1462 else if (response_header_copy (resp, "Connection", hdrval,
1465 if (0 == strcasecmp (hdrval, "Keep-Alive"))
1470 /* The server has promised that it will not close the connection
1471 when we're done. This means that we can register it. */
1472 register_persistent (conn->host, conn->port, sock, using_ssl);
1474 if (statcode == HTTP_STATUS_UNAUTHORIZED)
1476 /* Authorization is required. */
1477 skip_short_body (sock, contlen);
1478 CLOSE_FINISH (sock);
1479 if (auth_tried_already || !(user && passwd))
1481 /* If we have tried it already, then there is not point
1483 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1487 char *www_authenticate = response_header_strdup (resp,
1488 "WWW-Authenticate");
1489 /* If the authentication scheme is unknown or if it's the
1490 "Basic" authentication (which we try by default), there's
1491 no sense in retrying. */
1492 if (!www_authenticate
1493 || !known_authentication_scheme_p (www_authenticate)
1494 || BEGINS_WITH (www_authenticate, "Basic"))
1496 xfree_null (www_authenticate);
1497 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1502 auth_tried_already = 1;
1503 pth = url_full_path (u);
1504 request_set_header (req, "Authorization",
1505 create_authorization_line (www_authenticate,
1507 request_method (req),
1511 xfree (www_authenticate);
1512 goto retry_with_auth;
1520 hs->statcode = statcode;
1522 hs->error = xstrdup (_("Malformed status line"));
1524 hs->error = xstrdup (_("(no description)"));
1526 hs->error = xstrdup (message);
1528 type = response_header_strdup (resp, "Content-Type");
1531 char *tmp = strchr (type, ';');
1534 while (tmp > type && ISSPACE (tmp[-1]))
1539 hs->newloc = response_header_strdup (resp, "Location");
1540 hs->remote_time = response_header_strdup (resp, "Last-Modified");
1542 char *set_cookie = response_header_strdup (resp, "Set-Cookie");
1545 /* The jar should have been created by now. */
1546 assert (wget_cookie_jar != NULL);
1547 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
1552 if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
1554 long first_byte_pos, last_byte_pos, entity_length;
1555 if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
1557 contrange = first_byte_pos;
1559 response_free (resp);
1561 /* 20x responses are counted among successful by default. */
1562 if (H_20X (statcode))
1565 /* Return if redirected. */
1566 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1568 /* RFC2068 says that in case of the 300 (multiple choices)
1569 response, the server can output a preferred URL through
1570 `Location' header; otherwise, the request should be treated
1571 like GET. So, if the location is set, it will be a
1572 redirection; otherwise, just proceed normally. */
1573 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1577 logprintf (LOG_VERBOSE,
1578 _("Location: %s%s\n"),
1579 hs->newloc ? hs->newloc : _("unspecified"),
1580 hs->newloc ? _(" [following]") : "");
1582 skip_short_body (sock, contlen);
1583 CLOSE_FINISH (sock);
1589 /* If content-type is not given, assume text/html. This is because
1590 of the multitude of broken CGI's that "forget" to generate the
1593 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1594 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1599 if (opt.html_extension && (*dt & TEXTHTML))
1600 /* -E / --html-extension / html_extension = on was specified, and this is a
1601 text/html file. If some case-insensitive variation on ".htm[l]" isn't
1602 already the file's suffix, tack on ".html". */
1604 char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
1606 if (last_period_in_local_filename == NULL
1607 || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1608 || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1610 size_t local_filename_len = strlen(*hs->local_file);
1612 *hs->local_file = xrealloc(*hs->local_file,
1613 local_filename_len + sizeof(".html"));
1614 strcpy(*hs->local_file + local_filename_len, ".html");
1616 *dt |= ADDED_HTML_EXTENSION;
1620 if (contrange == 0 && hs->restval > 0)
1622 /* The download starts from the beginning, presumably because
1623 the server did not honor our `Range' request. Normally we'd
1624 just reset hs->restval and start the download from
1627 /* However, if `-c' is used, we need to be a bit more careful:
1629 1. If `-c' is specified and the file already existed when
1630 Wget was started, it would be a bad idea to start downloading
1631 it from scratch, effectively truncating the file.
1633 2. If `-c' is used on a file that is already fully
1634 downloaded, we're requesting bytes after the end of file,
1635 which can result in the server not honoring `Range'. If this
1636 is the case, `Content-Length' will be equal to the length of
1638 if (opt.always_rest)
1640 /* Check for condition #2. */
1641 if (contlen != -1 /* we got content-length. */
1642 && hs->restval >= contlen /* file fully downloaded
1646 logputs (LOG_VERBOSE, _("\
1647 \n The file is already fully retrieved; nothing to do.\n\n"));
1648 /* In case the caller inspects. */
1651 /* Mark as successfully retrieved. */
1654 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1655 might be more bytes in the body. */
1656 return RETRUNNEEDED;
1659 /* Check for condition #1. */
1660 if (hs->no_truncate)
1662 logprintf (LOG_NOTQUIET,
1665 Continued download failed on this file, which conflicts with `-c'.\n\
1666 Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
1668 CLOSE_INVALIDATE (sock); /* see above */
1669 return CONTNOTSUPPORTED;
1677 else if (contrange != hs->restval ||
1678 (H_PARTIAL (statcode) && contrange == -1))
1680 /* This means the whole request was somehow misunderstood by the
1681 server. Bail out. */
1683 CLOSE_INVALIDATE (sock);
1686 hs->contlen = contlen + contrange;
1692 /* No need to print this output if the body won't be
1693 downloaded at all, or if the original server response is
1695 logputs (LOG_VERBOSE, _("Length: "));
1698 logputs (LOG_VERBOSE, legible (contlen + contrange));
1700 logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen));
1703 logputs (LOG_VERBOSE,
1704 opt.ignore_length ? _("ignored") : _("unspecified"));
1706 logprintf (LOG_VERBOSE, " [%s]\n", type);
1708 logputs (LOG_VERBOSE, "\n");
1712 type = NULL; /* We don't need it any more. */
1714 /* Return if we have no intention of further downloading. */
1715 if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1717 /* In case the caller cares to look... */
1721 /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
1722 servers not to send body in response to a HEAD request. If
1723 you encounter such a server (more likely a broken CGI), use
1724 `--no-http-keep-alive'. */
1725 CLOSE_FINISH (sock);
1726 return RETRFINISHED;
1729 /* Open the local file. */
1732 mkalldirs (*hs->local_file);
1734 rotate_backups (*hs->local_file);
1735 fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
1738 logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1739 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1740 might be more bytes in the body. */
1746 extern int global_download_count;
1748 /* To ensure that repeated "from scratch" downloads work for -O
1749 files, we rewind the file pointer, unless restval is
1750 non-zero. (This works only when -O is used on regular files,
1751 but it's still a valuable feature.)
1753 However, this loses when more than one URL is specified on
1754 the command line the second rewinds eradicates the contents
1755 of the first download. Thus we disable the above trick for
1756 all the downloads except the very first one.
1758 #### A possible solution to this would be to remember the
1759 file position in the output document and to seek to that
1760 position, instead of rewinding.
1762 We don't truncate stdout, since that breaks
1763 "wget -O - [...] >> foo".
1765 if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
1767 /* This will silently fail for streams that don't correspond
1768 to regular files, but that's OK. */
1770 /* ftruncate is needed because opt.dfp is opened in append
1771 mode if opt.always_rest is set. */
1772 ftruncate (fileno (fp), 0);
1777 /* #### This confuses the code that checks for file size. There
1778 should be some overhead information. */
1779 if (opt.save_headers)
1780 fwrite (head, 1, strlen (head), fp);
1782 /* Download the request body. */
1783 hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, keep_alive,
1784 hs->restval, &hs->len, &hs->dltime);
1785 hs->len += contrange;
1788 CLOSE_FINISH (sock);
1790 CLOSE_INVALIDATE (sock);
1793 /* Close or flush the file. We have to be careful to check for
1794 error here. Checking the result of fwrite() is not enough --
1795 errors could go unnoticed! */
1798 flush_res = fclose (fp);
1800 flush_res = fflush (fp);
1801 if (flush_res == EOF)
1806 return RETRFINISHED;
1809 /* The genuine HTTP loop! This is the part where the retrieval is
1810 retried, and retried, and retried, and... */
1812 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1813 int *dt, struct url *proxy)
1816 int use_ts, got_head = 0; /* time-stamping info */
1817 char *filename_plus_orig_suffix;
1818 char *local_filename = NULL;
1819 char *tms, *locf, *tmrate;
1821 time_t tml = -1, tmr = -1; /* local and remote time-stamps */
1822 long local_size = 0; /* the size of the local file */
1823 size_t filename_len;
1824 struct http_stat hstat; /* HTTP status */
1828 /* This used to be done in main(), but it's a better idea to do it
1829 here so that we don't go through the hoops if we're just using
1833 if (!wget_cookie_jar)
1834 wget_cookie_jar = cookie_jar_new ();
1835 if (opt.cookies_input && !cookies_loaded_p)
1837 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1838 cookies_loaded_p = 1;
1844 /* Warn on (likely bogus) wildcard usage in HTTP. Don't use
1845 has_wildcards_p because it would also warn on `?', and we know that
1846 shows up in CGI paths a *lot*. */
1847 if (strchr (u->url, '*'))
1848 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1850 /* Determine the local filename. */
1851 if (local_file && *local_file)
1852 hstat.local_file = local_file;
1853 else if (local_file)
1855 *local_file = url_file_name (u);
1856 hstat.local_file = local_file;
1860 dummy = url_file_name (u);
1861 hstat.local_file = &dummy;
1864 if (!opt.output_document)
1865 locf = *hstat.local_file;
1867 locf = opt.output_document;
1869 hstat.referer = referer;
1871 filename_len = strlen (*hstat.local_file);
1872 filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1874 if (opt.noclobber && file_exists_p (*hstat.local_file))
1876 /* If opt.noclobber is turned on and file already exists, do not
1877 retrieve the file */
1878 logprintf (LOG_VERBOSE, _("\
1879 File `%s' already there, will not retrieve.\n"), *hstat.local_file);
1880 /* If the file is there, we suppose it's retrieved OK. */
1883 /* #### Bogusness alert. */
1884 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1885 if (has_html_suffix_p (*hstat.local_file))
1893 if (opt.timestamping)
1895 int local_dot_orig_file_exists = 0;
1897 if (opt.backup_converted)
1898 /* If -K is specified, we'll act on the assumption that it was specified
1899 last time these files were downloaded as well, and instead of just
1900 comparing local file X against server file X, we'll compare local
1901 file X.orig (if extant, else X) against server file X. If -K
1902 _wasn't_ specified last time, or the server contains files called
1903 *.orig, -N will be back to not operating correctly with -k. */
1905 /* Would a single s[n]printf() call be faster? --dan
1907 Definitely not. sprintf() is horribly slow. It's a
1908 different question whether the difference between the two
1909 affects a program. Usually I'd say "no", but at one
1910 point I profiled Wget, and found that a measurable and
1911 non-negligible amount of time was lost calling sprintf()
1912 in url.c. Replacing sprintf with inline calls to
1913 strcpy() and long_to_string() made a difference.
1915 memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
1916 memcpy (filename_plus_orig_suffix + filename_len,
1917 ".orig", sizeof (".orig"));
1919 /* Try to stat() the .orig file. */
1920 if (stat (filename_plus_orig_suffix, &st) == 0)
1922 local_dot_orig_file_exists = 1;
1923 local_filename = filename_plus_orig_suffix;
1927 if (!local_dot_orig_file_exists)
1928 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1929 if (stat (*hstat.local_file, &st) == 0)
1930 local_filename = *hstat.local_file;
1932 if (local_filename != NULL)
1933 /* There was a local file, so we'll check later to see if the version
1934 the server has is the same version we already have, allowing us to
1940 /* Modification time granularity is 2 seconds for Windows, so
1941 increase local time by 1 second for later comparison. */
1944 local_size = st.st_size;
1948 /* Reset the counter. */
1950 *dt = 0 | ACCEPTRANGES;
1954 /* Increment the pass counter. */
1956 sleep_between_retrievals (count);
1957 /* Get the current time string. */
1958 tms = time_str (NULL);
1959 /* Print fetch message, if opt.verbose. */
1962 char *hurl = url_string (u, 1);
1966 sprintf (tmp, _("(try:%2d)"), count);
1967 logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
1968 tms, hurl, tmp, locf);
1970 ws_changetitle (hurl, 1);
1975 /* Default document type is empty. However, if spider mode is
1976 on or time-stamping is employed, HEAD_ONLY commands is
1977 encoded within *dt. */
1978 if (opt.spider || (use_ts && !got_head))
1982 /* Assume no restarting. */
1984 /* Decide whether or not to restart. */
1985 if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
1986 /* #### this calls access() and then stat(); could be optimized. */
1987 && file_exists_p (locf))
1988 if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
1989 hstat.restval = st.st_size;
1991 /* In `-c' is used and the file is existing and non-empty,
1992 refuse to truncate it if the server doesn't support continued
1994 hstat.no_truncate = 0;
1995 if (opt.always_rest && hstat.restval)
1996 hstat.no_truncate = 1;
1998 /* Decide whether to send the no-cache directive. We send it in
2000 a) we're using a proxy, and we're past our first retrieval.
2001 Some proxies are notorious for caching incomplete data, so
2002 we require a fresh get.
2003 b) caching is explicitly inhibited. */
2004 if ((proxy && count > 1) /* a */
2005 || !opt.allow_cache /* b */
2007 *dt |= SEND_NOCACHE;
2009 *dt &= ~SEND_NOCACHE;
2011 /* Try fetching the document, or at least its head. */
2012 err = gethttp (u, &hstat, dt, proxy);
2014 /* It's unfortunate that wget determines the local filename before finding
2015 out the Content-Type of the file. Barring a major restructuring of the
2016 code, we need to re-set locf here, since gethttp() may have xrealloc()d
2017 *hstat.local_file to tack on ".html". */
2018 if (!opt.output_document)
2019 locf = *hstat.local_file;
2021 locf = opt.output_document;
2024 tms = time_str (NULL);
2025 /* Get the new location (with or without the redirection). */
2027 *newloc = xstrdup (hstat.newloc);
2030 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2031 case CONERROR: case READERR: case WRITEFAILED:
2033 /* Non-fatal errors continue executing the loop, which will
2034 bring them to "while" statement at the end, to judge
2035 whether the number of tries was exceeded. */
2036 free_hstat (&hstat);
2037 printwhat (count, opt.ntry);
2040 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2041 case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
2042 /* Fatal errors just return from the function. */
2043 free_hstat (&hstat);
2047 case FWRITEERR: case FOPENERR:
2048 /* Another fatal error. */
2049 logputs (LOG_VERBOSE, "\n");
2050 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
2051 *hstat.local_file, strerror (errno));
2052 free_hstat (&hstat);
2057 /* Another fatal error. */
2058 logputs (LOG_VERBOSE, "\n");
2059 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2060 free_hstat (&hstat);
2065 /* Return the new location to the caller. */
2068 logprintf (LOG_NOTQUIET,
2069 _("ERROR: Redirection (%d) without location.\n"),
2071 free_hstat (&hstat);
2075 free_hstat (&hstat);
2080 /* The file was already fully retrieved. */
2081 free_hstat (&hstat);
2086 /* Deal with you later. */
2089 /* All possibilities should have been exhausted. */
2092 if (!(*dt & RETROKF))
2096 /* #### Ugly ugly ugly! */
2097 char *hurl = url_string (u, 1);
2098 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2101 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2102 tms, hstat.statcode, hstat.error);
2103 logputs (LOG_VERBOSE, "\n");
2104 free_hstat (&hstat);
2109 /* Did we get the time-stamp? */
2112 if (opt.timestamping && !hstat.remote_time)
2114 logputs (LOG_NOTQUIET, _("\
2115 Last-modified header missing -- time-stamps turned off.\n"));
2117 else if (hstat.remote_time)
2119 /* Convert the date-string into struct tm. */
2120 tmr = http_atotm (hstat.remote_time);
2121 if (tmr == (time_t) (-1))
2122 logputs (LOG_VERBOSE, _("\
2123 Last-modified header invalid -- time-stamp ignored.\n"));
2127 /* The time-stamping section. */
2132 use_ts = 0; /* no more time-stamping */
2133 count = 0; /* the retrieve count for HEAD is
2135 if (hstat.remote_time && tmr != (time_t) (-1))
2137 /* Now time-stamping can be used validly. Time-stamping
2138 means that if the sizes of the local and remote file
2139 match, and local file is newer than the remote file,
2140 it will not be retrieved. Otherwise, the normal
2141 download procedure is resumed. */
2143 (hstat.contlen == -1 || local_size == hstat.contlen))
2145 logprintf (LOG_VERBOSE, _("\
2146 Server file no newer than local file `%s' -- not retrieving.\n\n"),
2148 free_hstat (&hstat);
2152 else if (tml >= tmr)
2153 logprintf (LOG_VERBOSE, _("\
2154 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
2156 logputs (LOG_VERBOSE,
2157 _("Remote file is newer, retrieving.\n"));
2159 free_hstat (&hstat);
2162 if ((tmr != (time_t) (-1))
2164 && ((hstat.len == hstat.contlen) ||
2165 ((hstat.res == 0) &&
2166 ((hstat.contlen == -1) ||
2167 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
2169 /* #### This code repeats in http.c and ftp.c. Move it to a
2171 const char *fl = NULL;
2172 if (opt.output_document)
2174 if (opt.od_known_regular)
2175 fl = opt.output_document;
2178 fl = *hstat.local_file;
2182 /* End of time-stamping section. */
2186 logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
2191 tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
2193 if (hstat.len == hstat.contlen)
2197 logprintf (LOG_VERBOSE,
2198 _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
2199 tms, tmrate, locf, hstat.len, hstat.contlen);
2200 logprintf (LOG_NONVERBOSE,
2201 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2202 tms, u->url, hstat.len, hstat.contlen, locf, count);
2205 total_downloaded_bytes += hstat.len;
2207 /* Remember that we downloaded the file for later ".orig" code. */
2208 if (*dt & ADDED_HTML_EXTENSION)
2209 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2211 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2213 free_hstat (&hstat);
2217 else if (hstat.res == 0) /* No read error */
2219 if (hstat.contlen == -1) /* We don't know how much we were supposed
2220 to get, so assume we succeeded. */
2224 logprintf (LOG_VERBOSE,
2225 _("%s (%s) - `%s' saved [%ld]\n\n"),
2226 tms, tmrate, locf, hstat.len);
2227 logprintf (LOG_NONVERBOSE,
2228 "%s URL:%s [%ld] -> \"%s\" [%d]\n",
2229 tms, u->url, hstat.len, locf, count);
2232 total_downloaded_bytes += hstat.len;
2234 /* Remember that we downloaded the file for later ".orig" code. */
2235 if (*dt & ADDED_HTML_EXTENSION)
2236 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2238 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2240 free_hstat (&hstat);
2244 else if (hstat.len < hstat.contlen) /* meaning we lost the
2245 connection too soon */
2247 logprintf (LOG_VERBOSE,
2248 _("%s (%s) - Connection closed at byte %ld. "),
2249 tms, tmrate, hstat.len);
2250 printwhat (count, opt.ntry);
2251 free_hstat (&hstat);
2254 else if (!opt.kill_longer) /* meaning we got more than expected */
2256 logprintf (LOG_VERBOSE,
2257 _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
2258 tms, tmrate, locf, hstat.len, hstat.contlen);
2259 logprintf (LOG_NONVERBOSE,
2260 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
2261 tms, u->url, hstat.len, hstat.contlen, locf, count);
2263 total_downloaded_bytes += hstat.len;
2265 /* Remember that we downloaded the file for later ".orig" code. */
2266 if (*dt & ADDED_HTML_EXTENSION)
2267 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
2269 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
2271 free_hstat (&hstat);
2275 else /* the same, but not accepted */
2277 logprintf (LOG_VERBOSE,
2278 _("%s (%s) - Connection closed at byte %ld/%ld. "),
2279 tms, tmrate, hstat.len, hstat.contlen);
2280 printwhat (count, opt.ntry);
2281 free_hstat (&hstat);
2285 else /* now hstat.res can only be -1 */
2287 if (hstat.contlen == -1)
2289 logprintf (LOG_VERBOSE,
2290 _("%s (%s) - Read error at byte %ld (%s)."),
2291 tms, tmrate, hstat.len, strerror (errno));
2292 printwhat (count, opt.ntry);
2293 free_hstat (&hstat);
2296 else /* hstat.res == -1 and contlen is given */
2298 logprintf (LOG_VERBOSE,
2299 _("%s (%s) - Read error at byte %ld/%ld (%s). "),
2300 tms, tmrate, hstat.len, hstat.contlen,
2302 printwhat (count, opt.ntry);
2303 free_hstat (&hstat);
2310 while (!opt.ntry || (count < opt.ntry));
2314 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2315 than local timezone.
2317 mktime is similar but assumes struct tm, also known as the
2318 "broken-down" form of time, is in local time zone. mktime_from_utc
2319 uses mktime to make the conversion understanding that an offset
2320 will be introduced by the local time assumption.
2322 mktime_from_utc then measures the introduced offset by applying
2323 gmtime to the initial result and applying mktime to the resulting
2324 "broken-down" form. The difference between the two mktime results
2325 is the measured offset which is then subtracted from the initial
2326 mktime result to yield a calendar time which is the value returned.
2328 tm_isdst in struct tm is set to 0 to force mktime to introduce a
2329 consistent offset (the non DST offset) since tm and tm+o might be
2330 on opposite sides of a DST change.
2332 Some implementations of mktime return -1 for the nonexistent
2333 localtime hour at the beginning of DST. In this event, use
2334 mktime(tm - 1hr) + 3600.
2338 gmtime(t+o) --> tm+o
2339 mktime(tm+o) --> t+2o
2340 t+o - (t+2o - t+o) = t
2342 Note that glibc contains a function of the same purpose named
2343 `timegm' (reverse of gmtime). But obviously, it is not universally
2344 available, and unfortunately it is not straightforwardly
2345 extractable for use here. Perhaps configure should detect timegm
2346 and use it where available.
2348 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2349 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2350 Further improved by Roger with assistance from Edward J. Sabol
2351 based on input by Jamie Zawinski. */
2354 mktime_from_utc (struct tm *t)
2365 return -1; /* can't deal with output from strptime */
2376 return -1; /* can't deal with output from gmtime */
2379 return (tl - (tb - tl));
2382 /* Check whether the result of strptime() indicates success.
2383 strptime() returns the pointer to how far it got to in the string.
2384 The processing has been successful if the string is at `GMT' or
2385 `+X', or at the end of the string.
2387 In extended regexp parlance, the function returns 1 if P matches
2388 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2389 can return) is considered a failure and 0 is returned. */
2391 check_end (const char *p)
2395 while (ISSPACE (*p))
2398 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2399 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2405 /* Convert the textual specification of time in TIME_STRING to the
2406 number of seconds since the Epoch.
2408 TIME_STRING can be in any of the three formats RFC2068 allows the
2409 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date.
2410 Timezones are ignored, and should be GMT.
2412 Return the computed time_t representation, or -1 if the conversion
2415 This function uses strptime with various string formats for parsing
2416 TIME_STRING. This results in a parser that is not as lenient in
2417 interpreting TIME_STRING as I would like it to be. Being based on
2418 strptime, it always allows shortened months, one-digit days, etc.,
2419 but due to the multitude of formats in which time can be
2420 represented, an ideal HTTP time parser would be even more
2421 forgiving. It should completely ignore things like week days and
2422 concentrate only on the various forms of representing years,
2423 months, days, hours, minutes, and seconds. For example, it would
2424 be nice if it accepted ISO 8601 out of the box.
2426 I've investigated free and PD code for this purpose, but none was
2427 usable. getdate was big and unwieldy, and had potential copyright
2428 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2429 distributed with phttpd, is excellent, but we cannot use it because
2430 it is not assigned to the FSF. So I stuck it with strptime. */
2433 http_atotm (const char *time_string)
2435 /* NOTE: Solaris strptime man page claims that %n and %t match white
2436 space, but that's not universally available. Instead, we simply
2437 use ` ' to mean "skip all WS", which works under all strptime
2438 implementations I've tested. */
2440 static const char *time_formats[] = {
2441 "%a, %d %b %Y %T", /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
2442 "%A, %d-%b-%y %T", /* RFC850: Thursday, 29-Jan-98 22:12:57 */
2443 "%a, %d-%b-%Y %T", /* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
2444 (google.com uses this for their cookies.) */
2445 "%a %b %d %T %Y" /* asctime: Thu Jan 29 22:12:57 1998 */
2451 /* According to Roger Beeman, we need to initialize tm_isdst, since
2452 strptime won't do it. */
2455 /* Note that under foreign locales Solaris strptime() fails to
2456 recognize English dates, which renders this function useless. We
2457 solve this by being careful not to affect LC_TIME when
2458 initializing locale.
2460 Another solution would be to temporarily set locale to C, invoke
2461 strptime(), and restore it back. This is slow and dirty,
2462 however, and locale support other than LC_MESSAGES can mess other
2463 things, so I rather chose to stick with just setting LC_MESSAGES.
2465 GNU strptime does not have this problem because it recognizes
2466 both international and local dates. */
2468 for (i = 0; i < countof (time_formats); i++)
2469 if (check_end (strptime (time_string, time_formats[i], &t)))
2470 return mktime_from_utc (&t);
2472 /* All formats have failed. */
2476 /* Authorization support: We support two authorization schemes:
2478 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2480 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2481 consisting of answering to the server's challenge with the proper
2484 /* How many bytes it will take to store LEN bytes in base64. */
2485 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
2487 /* Encode the string S of length LENGTH to base64 format and place it
2488 to STORE. STORE will be 0-terminated, and must point to a writable
2489 buffer of at least 1+BASE64_LENGTH(length) bytes. */
2491 base64_encode (const char *s, char *store, int length)
2493 /* Conversion table. */
2494 static char tbl[64] = {
2495 'A','B','C','D','E','F','G','H',
2496 'I','J','K','L','M','N','O','P',
2497 'Q','R','S','T','U','V','W','X',
2498 'Y','Z','a','b','c','d','e','f',
2499 'g','h','i','j','k','l','m','n',
2500 'o','p','q','r','s','t','u','v',
2501 'w','x','y','z','0','1','2','3',
2502 '4','5','6','7','8','9','+','/'
2505 unsigned char *p = (unsigned char *)store;
2507 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
2508 for (i = 0; i < length; i += 3)
2510 *p++ = tbl[s[0] >> 2];
2511 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2512 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2513 *p++ = tbl[s[2] & 0x3f];
2516 /* Pad the result if necessary... */
2517 if (i == length + 1)
2519 else if (i == length + 2)
2520 *(p - 1) = *(p - 2) = '=';
2521 /* ...and zero-terminate it. */
2525 /* Create the authentication header contents for the `Basic' scheme.
2526 This is done by encoding the string `USER:PASS' in base64 and
2527 prepending `HEADER: Basic ' to it. */
2529 basic_authentication_encode (const char *user, const char *passwd)
2531 char *t1, *t2, *res;
2532 int len1 = strlen (user) + 1 + strlen (passwd);
2533 int len2 = BASE64_LENGTH (len1);
2535 t1 = (char *)alloca (len1 + 1);
2536 sprintf (t1, "%s:%s", user, passwd);
2538 t2 = (char *)alloca (len2 + 1);
2539 base64_encode (t1, t2, len1);
2541 res = (char *)xmalloc (6 + len2 + 1);
2542 sprintf (res, "Basic %s", t2);
2547 #define SKIP_WS(x) do { \
2548 while (ISSPACE (*(x))) \
2553 /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
2554 of a field in such a header. If the field is the one specified by
2555 ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2556 digest authorization code), extract its value in the (char*)
2557 variable pointed by RET. Returns negative on a malformed header,
2558 or number of bytes that have been parsed by this call. */
2560 extract_header_attr (const char *au, const char *attr_name, char **ret)
2562 const char *cp, *ep;
2566 if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2568 cp += strlen (attr_name);
2581 for (ep = cp; *ep && *ep != '\"'; ep++)
2586 *ret = strdupdelim (cp, ep);
2593 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2594 an array of 16 bytes containing the hash keys, and BUF should be a
2595 buffer of 33 writable characters (32 for hex digits plus one for
2596 zero termination). */
2598 dump_hash (unsigned char *buf, const unsigned char *hash)
2602 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2604 *buf++ = XNUM_TO_digit (*hash >> 4);
2605 *buf++ = XNUM_TO_digit (*hash & 0xf);
2610 /* Take the line apart to find the challenge, and compose a digest
2611 authorization header. See RFC2069 section 2.1.2. */
2613 digest_authentication_encode (const char *au, const char *user,
2614 const char *passwd, const char *method,
2617 static char *realm, *opaque, *nonce;
2622 { "realm", &realm },
2623 { "opaque", &opaque },
2628 realm = opaque = nonce = NULL;
2630 au += 6; /* skip over `Digest' */
2636 for (i = 0; i < countof (options); i++)
2638 int skip = extract_header_attr (au, options[i].name,
2639 options[i].variable);
2643 xfree_null (opaque);
2653 if (i == countof (options))
2655 while (*au && *au != '=')
2663 while (*au && *au != '\"')
2670 while (*au && *au != ',')
2675 if (!realm || !nonce || !user || !passwd || !path || !method)
2678 xfree_null (opaque);
2683 /* Calculate the digest value. */
2685 ALLOCA_MD5_CONTEXT (ctx);
2686 unsigned char hash[MD5_HASHLEN];
2687 unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2688 unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2690 /* A1BUF = H(user ":" realm ":" password) */
2692 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2693 gen_md5_update ((unsigned char *)":", 1, ctx);
2694 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2695 gen_md5_update ((unsigned char *)":", 1, ctx);
2696 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2697 gen_md5_finish (ctx, hash);
2698 dump_hash (a1buf, hash);
2700 /* A2BUF = H(method ":" path) */
2702 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2703 gen_md5_update ((unsigned char *)":", 1, ctx);
2704 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2705 gen_md5_finish (ctx, hash);
2706 dump_hash (a2buf, hash);
2708 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2710 gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2711 gen_md5_update ((unsigned char *)":", 1, ctx);
2712 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2713 gen_md5_update ((unsigned char *)":", 1, ctx);
2714 gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2715 gen_md5_finish (ctx, hash);
2716 dump_hash (response_digest, hash);
2718 res = (char*) xmalloc (strlen (user)
2723 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2724 + (opaque ? strlen (opaque) : 0)
2726 sprintf (res, "Digest \
2727 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2728 user, realm, nonce, path, response_digest);
2731 char *p = res + strlen (res);
2732 strcat (p, ", opaque=\"");
2739 #endif /* USE_DIGEST */
2742 #define BEGINS_WITH(line, string_constant) \
2743 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
2744 && (ISSPACE (line[sizeof (string_constant) - 1]) \
2745 || !line[sizeof (string_constant) - 1]))
2748 known_authentication_scheme_p (const char *au)
2750 return BEGINS_WITH (au, "Basic")
2751 || BEGINS_WITH (au, "Digest")
2752 || BEGINS_WITH (au, "NTLM");
2757 /* Create the HTTP authorization request header. When the
2758 `WWW-Authenticate' response header is seen, according to the
2759 authorization scheme specified in that header (`Basic' and `Digest'
2760 are supported by the current implementation), produce an
2761 appropriate HTTP authorization request header. */
2763 create_authorization_line (const char *au, const char *user,
2764 const char *passwd, const char *method,
2767 if (0 == strncasecmp (au, "Basic", 5))
2768 return basic_authentication_encode (user, passwd);
2770 if (0 == strncasecmp (au, "Digest", 6))
2771 return digest_authentication_encode (au, user, passwd, method, path);
2772 #endif /* USE_DIGEST */