2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
35 #include <sys/types.h>
46 #if TIME_WITH_SYS_TIME
47 # include <sys/time.h>
51 # include <sys/time.h>
70 # include "gen_sslfunc.h"
78 extern char *version_string;
79 extern LARGE_INT total_downloaded_bytes;
82 static int cookies_loaded_p;
83 struct cookie_jar *wget_cookie_jar;
85 #define TEXTHTML_S "text/html"
86 #define TEXTXHTML_S "application/xhtml+xml"
87 #define HTTP_ACCEPT "*/*"
89 /* Some status code validation macros: */
90 #define H_20X(x) (((x) >= 200) && ((x) < 300))
91 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
92 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
93 || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
94 || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
96 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
98 #define HTTP_STATUS_OK 200
99 #define HTTP_STATUS_CREATED 201
100 #define HTTP_STATUS_ACCEPTED 202
101 #define HTTP_STATUS_NO_CONTENT 204
102 #define HTTP_STATUS_PARTIAL_CONTENTS 206
104 /* Redirection 3xx. */
105 #define HTTP_STATUS_MULTIPLE_CHOICES 300
106 #define HTTP_STATUS_MOVED_PERMANENTLY 301
107 #define HTTP_STATUS_MOVED_TEMPORARILY 302
108 #define HTTP_STATUS_NOT_MODIFIED 304
109 #define HTTP_STATUS_TEMPORARY_REDIRECT 307
111 /* Client error 4xx. */
112 #define HTTP_STATUS_BAD_REQUEST 400
113 #define HTTP_STATUS_UNAUTHORIZED 401
114 #define HTTP_STATUS_FORBIDDEN 403
115 #define HTTP_STATUS_NOT_FOUND 404
117 /* Server errors 5xx. */
118 #define HTTP_STATUS_INTERNAL 500
119 #define HTTP_STATUS_NOT_IMPLEMENTED 501
120 #define HTTP_STATUS_BAD_GATEWAY 502
121 #define HTTP_STATUS_UNAVAILABLE 503
124 /* Parse the HTTP status line, which is of format:
126 HTTP-Version SP Status-Code SP Reason-Phrase
128 The function returns the status-code, or -1 if the status line is
129 malformed. The pointer to reason-phrase is returned in RP. */
131 parse_http_status_line (const char *line, const char **reason_phrase_ptr)
133 /* (the variables must not be named `major' and `minor', because
134 that breaks compilation with SunOS4 cc.) */
135 int mjr, mnr, statcode;
138 *reason_phrase_ptr = NULL;
140 /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is
141 major version, and Y is minor version. */
142 if (strncmp (line, "HTTP/", 5) != 0)
146 /* Calculate major HTTP version. */
148 for (mjr = 0; ISDIGIT (*line); line++)
149 mjr = 10 * mjr + (*line - '0');
150 if (*line != '.' || p == line)
154 /* Calculate minor HTTP version. */
156 for (mnr = 0; ISDIGIT (*line); line++)
157 mnr = 10 * mnr + (*line - '0');
158 if (*line != ' ' || p == line)
160 /* Wget will accept only 1.0 and higher HTTP-versions. The value of
161 minor version can be safely ignored. */
166 /* Calculate status code. */
167 if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2])))
169 statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0');
171 /* Set up the reason phrase pointer. */
173 /* RFC2068 requires SPC here, but we allow the string to finish
174 here, in case no reason-phrase is present. */
178 *reason_phrase_ptr = line;
183 *reason_phrase_ptr = line + 1;
188 #define WMIN(x, y) ((x) > (y) ? (y) : (x))
190 /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
191 PROMISED_SIZE bytes are sent over the wire -- if the file is
192 longer, read only that much; if the file is shorter, report an error. */
195 post_file (int sock, const char *file_name, long promised_size)
197 static char chunk[8192];
202 DEBUGP (("[writing POST file %s ... ", file_name));
204 fp = fopen (file_name, "rb");
207 while (!feof (fp) && written < promised_size)
210 int length = fread (chunk, 1, sizeof (chunk), fp);
213 towrite = WMIN (promised_size - written, length);
214 write_error = xwrite (sock, chunk, towrite, -1);
224 /* If we've written less than was promised, report a (probably
225 nonsensical) error rather than break the promise. */
226 if (written < promised_size)
232 assert (written == promised_size);
233 DEBUGP (("done]\n"));
237 /* Functions to be used as arguments to header_process(): */
239 struct http_process_range_closure {
245 /* Parse the `Content-Range' header and extract the information it
246 contains. Returns 1 if successful, -1 otherwise. */
248 http_process_range (const char *hdr, void *arg)
250 struct http_process_range_closure *closure
251 = (struct http_process_range_closure *)arg;
254 /* Certain versions of Nutscape proxy server send out
255 `Content-Length' without "bytes" specifier, which is a breach of
256 RFC2068 (as well as the HTTP/1.1 draft which was current at the
257 time). But hell, I must support it... */
258 if (!strncasecmp (hdr, "bytes", 5))
261 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
265 hdr += skip_lws (hdr);
271 for (num = 0; ISDIGIT (*hdr); hdr++)
272 num = 10 * num + (*hdr - '0');
273 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
275 closure->first_byte_pos = num;
277 for (num = 0; ISDIGIT (*hdr); hdr++)
278 num = 10 * num + (*hdr - '0');
279 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
281 closure->last_byte_pos = num;
283 for (num = 0; ISDIGIT (*hdr); hdr++)
284 num = 10 * num + (*hdr - '0');
285 closure->entity_length = num;
289 /* Place 1 to ARG if the HDR contains the word "none", 0 otherwise.
290 Used for `Accept-Ranges'. */
292 http_process_none (const char *hdr, void *arg)
294 int *where = (int *)arg;
296 if (strstr (hdr, "none"))
303 /* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */
305 http_process_type (const char *hdr, void *arg)
307 char **result = (char **)arg;
308 /* Locate P on `;' or the terminating zero, whichever comes first. */
309 const char *p = strchr (hdr, ';');
311 p = hdr + strlen (hdr);
312 while (p > hdr && ISSPACE (*(p - 1)))
314 *result = strdupdelim (hdr, p);
318 /* Check whether the `Connection' header is set to "keep-alive". */
320 http_process_connection (const char *hdr, void *arg)
322 int *flag = (int *)arg;
323 if (!strcasecmp (hdr, "Keep-Alive"))
328 /* Commit the cookie to the cookie jar. */
331 http_process_set_cookie (const char *hdr, void *arg)
333 struct url *u = (struct url *)arg;
335 /* The jar should have been created by now. */
336 assert (wget_cookie_jar != NULL);
338 cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, hdr);
343 /* Persistent connections. Currently, we cache the most recently used
344 connection as persistent, provided that the HTTP server agrees to
345 make it such. The persistence data is stored in the variables
346 below. Ideally, it should be possible to cache an arbitrary fixed
347 number of these connections. */
349 /* Whether a persistent connection is active. */
350 static int pconn_active;
353 /* The socket of the connection. */
356 /* Host and port of the currently active persistent connection. */
360 /* Whether a ssl handshake has occoured on this connection. */
364 /* Mark the persistent connection as invalid and free the resources it
365 uses. This is used by the CLOSE_* macros after they forcefully
366 close a registered persistent connection. */
369 invalidate_persistent (void)
371 DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
373 xclose (pconn.socket);
378 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
379 persistent. This will enable someone to use the same connection
380 later. In the context of HTTP, this must be called only AFTER the
381 response has been received and the server has promised that the
382 connection will remain alive.
384 If a previous connection was persistent, it is closed. */
387 register_persistent (const char *host, int port, int fd, int ssl)
391 if (pconn.socket == fd)
393 /* The connection FD is already registered. */
398 /* The old persistent connection is still active; close it
399 first. This situation arises whenever a persistent
400 connection exists, but we then connect to a different
401 host, and try to register a persistent connection to that
403 invalidate_persistent ();
409 pconn.host = xstrdup (host);
413 DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
416 /* Return non-zero if a persistent connection is available for
417 connecting to HOST:PORT. */
420 persistent_available_p (const char *host, int port, int ssl,
421 int *host_lookup_failed)
423 /* First, check whether a persistent connection is active at all. */
427 /* If we want SSL and the last connection wasn't or vice versa,
428 don't use it. Checking for host and port is not enough because
429 HTTP and HTTPS can apparently coexist on the same port. */
430 if (ssl != pconn.ssl)
433 /* If we're not connecting to the same port, we're not interested. */
434 if (port != pconn.port)
437 /* If the host is the same, we're in business. If not, there is
438 still hope -- read below. */
439 if (0 != strcasecmp (host, pconn.host))
441 /* If pconn.socket is already talking to HOST, we needn't
442 reconnect. This happens often when both sites are virtual
443 hosts distinguished only by name and served by the same
444 network interface, and hence the same web server (possibly
445 set up by the ISP and serving many different web sites).
446 This admittedly non-standard optimization does not contradict
447 HTTP and works well with popular server software. */
451 struct address_list *al;
454 /* Don't try to talk to two different SSL sites over the same
455 secure connection! (Besides, it's not clear if name-based
456 virtual hosting is even possible with SSL.) */
459 /* If pconn.socket's peer is one of the IP addresses HOST
460 resolves to, pconn.socket is for all intents and purposes
461 already talking to HOST. */
463 if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
465 /* Can't get the peer's address -- something must be very
466 wrong with the connection. */
467 invalidate_persistent ();
470 al = lookup_host (host, 0);
473 *host_lookup_failed = 1;
477 found = address_list_contains (al, &ip);
478 address_list_release (al);
483 /* The persistent connection's peer address was found among the
484 addresses HOST resolved to; therefore, pconn.sock is in fact
485 already talking to HOST -- no need to reconnect. */
488 /* Finally, check whether the connection is still open. This is
489 important because most server implement a liberal (short) timeout
490 on persistent connections. Wget can of course always reconnect
491 if the connection doesn't work out, but it's nicer to know in
492 advance. This test is a logical followup of the first test, but
493 is "expensive" and therefore placed at the end of the list. */
495 if (!test_socket_open (pconn.socket))
497 /* Oops, the socket is no longer open. Now that we know that,
498 let's invalidate the persistent connection before returning
500 invalidate_persistent ();
507 /* The idea behind these two CLOSE macros is to distinguish between
508 two cases: one when the job we've been doing is finished, and we
509 want to close the connection and leave, and two when something is
510 seriously wrong and we're closing the connection as part of
513 In case of keep_alive, CLOSE_FINISH should leave the connection
514 open, while CLOSE_INVALIDATE should still close it.
516 Note that the semantics of the flag `keep_alive' is "this
517 connection *will* be reused (the server has promised not to close
518 the connection once we're done)", while the semantics of
519 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
520 active, registered connection". */
522 #define CLOSE_FINISH(fd) do { \
525 if (pconn_active && (fd) == pconn.socket) \
526 invalidate_persistent (); \
532 #define CLOSE_INVALIDATE(fd) do { \
533 if (pconn_active && (fd) == pconn.socket) \
534 invalidate_persistent (); \
541 long len; /* received length */
542 long contlen; /* expected length */
543 long restval; /* the restart value */
544 int res; /* the result of last read */
545 char *newloc; /* new location (redirection) */
546 char *remote_time; /* remote time-stamp string */
547 char *error; /* textual HTTP error */
548 int statcode; /* status code */
549 double dltime; /* time of the download in msecs */
550 int no_truncate; /* whether truncating the file is
552 const char *referer; /* value of the referer header. */
553 char **local_file; /* local file. */
557 free_hstat (struct http_stat *hs)
559 xfree_null (hs->newloc);
560 xfree_null (hs->remote_time);
561 xfree_null (hs->error);
563 /* Guard against being called twice. */
565 hs->remote_time = NULL;
569 static char *create_authorization_line PARAMS ((const char *, const char *,
570 const char *, const char *,
572 static char *basic_authentication_encode PARAMS ((const char *, const char *,
574 static int known_authentication_scheme_p PARAMS ((const char *));
576 time_t http_atotm PARAMS ((const char *));
578 #define BEGINS_WITH(line, string_constant) \
579 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
580 && (ISSPACE (line[sizeof (string_constant) - 1]) \
581 || !line[sizeof (string_constant) - 1]))
583 /* Retrieve a document through HTTP protocol. It recognizes status
584 code, and correctly handles redirections. It closes the network
585 socket. If it receives an error from the functions below it, it
586 will print it if there is enough information to do so (almost
587 always), returning the error to the caller (i.e. http_loop).
589 Various HTTP parameters are stored to hs.
591 If PROXY is non-NULL, the connection will be made to the proxy
592 server, and u->url will be requested. */
594 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
596 char *request, *type, *command, *full_path;
598 char *pragma_h, *referer, *useragent, *range, *wwwauth;
599 char *authenticate_h;
603 char *request_keep_alive;
604 int sock, hcount, all_length, statcode;
606 long contlen, contrange;
609 int auth_tried_already;
612 char *cookies = NULL;
614 /* Whether this connection will be kept alive after the HTTP request
618 /* Flags that detect the two ways of specifying HTTP keep-alive
620 int http_keep_alive_1, http_keep_alive_2;
622 /* Whether keep-alive should be inhibited. */
623 int inhibit_keep_alive;
625 /* Whether we need to print the host header with braces around host,
626 e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual
627 "Host: symbolic-name:1234". */
628 int squares_around_host = 0;
630 /* Headers sent when using POST. */
631 char *post_content_type, *post_content_length;
632 long post_data_size = 0;
634 int host_lookup_failed;
637 /* Initialize the SSL context. After the first run, this is a
641 case SSLERRCTXCREATE:
643 logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
644 return SSLERRCTXCREATE;
646 /* try without certfile */
647 logprintf (LOG_NOTQUIET,
648 _("Failed to load certificates from %s\n"),
650 logprintf (LOG_NOTQUIET,
651 _("Trying without the specified certificate\n"));
654 logprintf (LOG_NOTQUIET,
655 _("Failed to get certificate key from %s\n"),
657 logprintf (LOG_NOTQUIET,
658 _("Trying without the specified certificate\n"));
663 #endif /* HAVE_SSL */
665 if (!(*dt & HEAD_ONLY))
666 /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
667 know the local filename so we can save to it. */
668 assert (*hs->local_file != NULL);
671 auth_tried_already = 0;
673 inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
676 /* We need to come back here when the initial attempt to retrieve
677 without authorization header fails. (Expected to happen at least
678 for the Digest authorization scheme.) */
681 http_keep_alive_1 = http_keep_alive_2 = 0;
683 post_content_type = NULL;
684 post_content_length = NULL;
686 /* Initialize certain elements of struct http_stat. */
691 hs->remote_time = NULL;
694 /* If we're using a proxy, we will be connecting to the proxy
696 conn = proxy ? proxy : u;
698 host_lookup_failed = 0;
700 /* First: establish the connection. */
701 if (inhibit_keep_alive
702 || !persistent_available_p (conn->host, conn->port,
704 u->scheme == SCHEME_HTTPS
708 , &host_lookup_failed))
710 /* In its current implementation, persistent_available_p will
711 look up conn->host in some cases. If that lookup failed, we
712 don't need to bother with connect_to_host. */
713 if (host_lookup_failed)
716 sock = connect_to_host (conn->host, conn->port);
720 return (retryable_socket_connect_error (errno)
721 ? CONERROR : CONIMPOSSIBLE);
724 if (conn->scheme == SCHEME_HTTPS)
726 if (!ssl_connect (sock))
728 logputs (LOG_VERBOSE, "\n");
729 logprintf (LOG_NOTQUIET,
730 _("Unable to establish SSL connection.\n"));
736 #endif /* HAVE_SSL */
740 logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
741 pconn.host, pconn.port);
743 using_ssl = pconn.ssl;
744 DEBUGP (("Reusing fd %d.\n", sock));
749 else if (opt.post_file_name || opt.post_data)
757 referer = (char *)alloca (9 + strlen (hs->referer) + 3);
758 sprintf (referer, "Referer: %s\r\n", hs->referer);
761 if (*dt & SEND_NOCACHE)
762 pragma_h = "Pragma: no-cache\r\n";
768 range = (char *)alloca (13 + numdigit (hs->restval) + 4);
769 /* Gag me! Some servers (e.g. WebSitePro) have been known to
770 respond to the following `Range' format by generating a
771 multipart/x-byte-ranges MIME document! This MIME type was
772 present in an old draft of the byteranges specification.
773 HTTP/1.1 specifies a multipart/byte-ranges MIME type, but
774 only if multiple non-overlapping ranges are requested --
775 which Wget never does. */
776 sprintf (range, "Range: bytes=%ld-\r\n", hs->restval);
781 STRDUP_ALLOCA (useragent, opt.useragent);
784 useragent = (char *)alloca (10 + strlen (version_string));
785 sprintf (useragent, "Wget/%s", version_string);
787 /* Construct the authentication, if userid is present. */
790 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
791 user = user ? user : opt.http_user;
792 passwd = passwd ? passwd : opt.http_passwd;
799 /* We have the username and the password, but haven't tried
800 any authorization yet. Let's see if the "Basic" method
801 works. If not, we'll come back here and construct a
802 proper authorization method with the right challenges.
804 If we didn't employ this kind of logic, every URL that
805 requires authorization would have to be processed twice,
806 which is very suboptimal and generates a bunch of false
807 "unauthorized" errors in the server log.
809 #### But this logic also has a serious problem when used
810 with stronger authentications: we *first* transmit the
811 username and the password in clear text, and *then*
812 attempt a stronger authentication scheme. That cannot be
813 right! We are only fortunate that almost everyone still
814 uses the `Basic' scheme anyway.
816 There should be an option to prevent this from happening,
817 for those who use strong authentication schemes and value
819 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
823 /* Use the full path, i.e. one that includes the leading
824 slash and the query string, but is independent of proxy
826 char *pth = url_full_path (u);
827 wwwauth = create_authorization_line (authenticate_h, user, passwd,
836 char *proxy_user, *proxy_passwd;
837 /* For normal username and password, URL components override
838 command-line/wgetrc parameters. With proxy authentication,
839 it's the reverse, because proxy URLs are normally the
840 "permanent" ones, so command-line args should take
842 if (opt.proxy_user && opt.proxy_passwd)
844 proxy_user = opt.proxy_user;
845 proxy_passwd = opt.proxy_passwd;
849 proxy_user = proxy->user;
850 proxy_passwd = proxy->passwd;
852 /* #### This does not appear right. Can't the proxy request,
853 say, `Digest' authentication? */
854 if (proxy_user && proxy_passwd)
855 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
856 "Proxy-Authorization");
859 /* String of the form :PORT. Used only for non-standard ports. */
861 if (u->port != scheme_default_port (u->scheme))
863 port_maybe = (char *)alloca (numdigit (u->port) + 2);
864 sprintf (port_maybe, ":%d", u->port);
867 if (!inhibit_keep_alive)
868 request_keep_alive = "Connection: Keep-Alive\r\n";
870 request_keep_alive = NULL;
873 cookies = cookie_header (wget_cookie_jar, u->host, u->port, u->path,
875 u->scheme == SCHEME_HTTPS
881 if (opt.post_data || opt.post_file_name)
883 post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n";
885 post_data_size = strlen (opt.post_data);
888 post_data_size = file_size (opt.post_file_name);
889 if (post_data_size == -1)
891 logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
896 post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1);
897 sprintf (post_content_length,
898 "Content-Length: %ld\r\n", post_data_size);
902 full_path = xstrdup (u->url);
904 /* Use the full path, i.e. one that includes the leading slash and
905 the query string. E.g. if u->path is "foo/bar" and u->query is
906 "param=value", full_path will be "/foo/bar?param=value". */
907 full_path = url_full_path (u);
909 if (strchr (u->host, ':'))
910 squares_around_host = 1;
912 /* Allocate the memory for the request. */
913 request = (char *)alloca (strlen (command)
917 + (port_maybe ? strlen (port_maybe) : 0)
918 + strlen (HTTP_ACCEPT)
919 + (request_keep_alive
920 ? strlen (request_keep_alive) : 0)
921 + (referer ? strlen (referer) : 0)
922 + (cookies ? strlen (cookies) : 0)
923 + (wwwauth ? strlen (wwwauth) : 0)
924 + (proxyauth ? strlen (proxyauth) : 0)
925 + (range ? strlen (range) : 0)
928 ? strlen (post_content_type) : 0)
929 + (post_content_length
930 ? strlen (post_content_length) : 0)
931 + (opt.user_header ? strlen (opt.user_header) : 0)
933 /* Construct the request. */
939 %s%s%s%s%s%s%s%s%s%s\r\n",
942 squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "",
943 port_maybe ? port_maybe : "",
945 request_keep_alive ? request_keep_alive : "",
946 referer ? referer : "",
947 cookies ? cookies : "",
948 wwwauth ? wwwauth : "",
949 proxyauth ? proxyauth : "",
952 post_content_type ? post_content_type : "",
953 post_content_length ? post_content_length : "",
954 opt.user_header ? opt.user_header : "");
955 DEBUGP (("\n---request begin---\n%s", request));
957 /* Free the temporary memory. */
958 xfree_null (wwwauth);
959 xfree_null (proxyauth);
960 xfree_null (cookies);
963 /* Send the request to server. */
964 write_error = xwrite (sock, request, strlen (request), -1);
966 if (write_error >= 0)
970 DEBUGP (("[POST data: %s]\n", opt.post_data));
971 write_error = xwrite (sock, opt.post_data, post_data_size, -1);
973 else if (opt.post_file_name && post_data_size != 0)
974 write_error = post_file (sock, opt.post_file_name, post_data_size);
976 DEBUGP (("---request end---\n"));
980 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
982 CLOSE_INVALIDATE (sock);
985 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
986 proxy ? "Proxy" : "HTTP");
987 contlen = contrange = -1;
992 /* Before reading anything, initialize the rbuf. */
993 rbuf_initialize (&rbuf, sock);
997 DEBUGP (("\n---response begin---\n"));
999 /* Header-fetching loop. */
1007 /* Get the header. */
1008 status = header_get (&rbuf, &hdr,
1009 /* Disallow continuations for status line. */
1010 (hcount == 1 ? HG_NO_CONTINUATIONS : HG_NONE));
1012 /* Check for errors. */
1013 if (status == HG_EOF && *hdr)
1015 /* This used to be an unconditional error, but that was
1016 somewhat controversial, because of a large number of
1017 broken CGI's that happily "forget" to send the second EOL
1018 before closing the connection of a HEAD request.
1020 So, the deal is to check whether the header is empty
1021 (*hdr is zero if it is); if yes, it means that the
1022 previous header was fully retrieved, and that -- most
1023 probably -- the request is complete. "...be liberal in
1024 what you accept." Oh boy. */
1025 logputs (LOG_VERBOSE, "\n");
1026 logputs (LOG_NOTQUIET, _("End of file while parsing headers.\n"));
1029 xfree_null (all_headers);
1030 CLOSE_INVALIDATE (sock);
1033 else if (status == HG_ERROR)
1035 logputs (LOG_VERBOSE, "\n");
1036 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1040 xfree_null (all_headers);
1041 CLOSE_INVALIDATE (sock);
1045 /* If the headers are to be saved to a file later, save them to
1047 if (opt.save_headers)
1049 int lh = strlen (hdr);
1050 all_headers = (char *)xrealloc (all_headers, all_length + lh + 2);
1051 memcpy (all_headers + all_length, hdr, lh);
1053 all_headers[all_length++] = '\n';
1054 all_headers[all_length] = '\0';
1057 /* Check for status line. */
1061 /* Parse the first line of server response. */
1062 statcode = parse_http_status_line (hdr, &error);
1063 hs->statcode = statcode;
1064 /* Store the descriptive response. */
1065 if (statcode == -1) /* malformed response */
1067 /* A common reason for "malformed response" error is the
1068 case when no data was actually received. Handle this
1071 hs->error = xstrdup (_("No data received"));
1073 hs->error = xstrdup (_("Malformed status line"));
1078 hs->error = xstrdup (_("(no description)"));
1080 hs->error = xstrdup (error);
1082 if ((statcode != -1)
1088 if (opt.server_response)
1089 logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
1091 logprintf (LOG_VERBOSE, "%2d %s", statcode, error);
1097 /* Exit on empty header. */
1104 /* Print the header if requested. */
1105 if (opt.server_response && hcount != 1)
1106 logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
1108 /* Try getting content-length. */
1109 if (contlen == -1 && !opt.ignore_length)
1110 if (header_process (hdr, "Content-Length", header_extract_number,
1113 /* Try getting content-type. */
1115 if (header_process (hdr, "Content-Type", http_process_type, &type))
1117 /* Try getting location. */
1119 if (header_process (hdr, "Location", header_strdup, &hs->newloc))
1121 /* Try getting last-modified. */
1122 if (!hs->remote_time)
1123 if (header_process (hdr, "Last-Modified", header_strdup,
1126 /* Try getting cookies. */
1128 if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u))
1130 /* Try getting www-authentication. */
1131 if (!authenticate_h)
1132 if (header_process (hdr, "WWW-Authenticate", header_strdup,
1135 /* Check for accept-ranges header. If it contains the word
1136 `none', disable the ranges. */
1137 if (*dt & ACCEPTRANGES)
1140 if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep))
1143 *dt &= ~ACCEPTRANGES;
1147 /* Try getting content-range. */
1148 if (contrange == -1)
1150 struct http_process_range_closure closure;
1151 if (header_process (hdr, "Content-Range", http_process_range, &closure))
1153 contrange = closure.first_byte_pos;
1157 /* Check for keep-alive related responses. */
1158 if (!inhibit_keep_alive)
1160 /* Check for the `Keep-Alive' header. */
1161 if (!http_keep_alive_1)
1163 if (header_process (hdr, "Keep-Alive", header_exists,
1164 &http_keep_alive_1))
1167 /* Check for `Connection: Keep-Alive'. */
1168 if (!http_keep_alive_2)
1170 if (header_process (hdr, "Connection", http_process_connection,
1171 &http_keep_alive_2))
1178 DEBUGP (("---response end---\n"));
1180 logputs (LOG_VERBOSE, "\n");
1183 && (http_keep_alive_1 || http_keep_alive_2))
1185 assert (inhibit_keep_alive == 0);
1189 /* The server has promised that it will not close the connection
1190 when we're done. This means that we can register it. */
1191 register_persistent (conn->host, conn->port, sock, using_ssl);
1193 if ((statcode == HTTP_STATUS_UNAUTHORIZED)
1196 /* Authorization is required. */
1200 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1201 might be more bytes in the body. */
1202 if (auth_tried_already)
1204 /* If we have tried it already, then there is not point
1207 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1208 xfree (authenticate_h);
1211 else if (!known_authentication_scheme_p (authenticate_h))
1213 xfree (authenticate_h);
1214 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1217 else if (BEGINS_WITH (authenticate_h, "Basic"))
1219 /* The authentication scheme is basic, the one we try by
1220 default, and it failed. There's no sense in trying
1226 auth_tried_already = 1;
1230 /* We do not need this anymore. */
1233 xfree (authenticate_h);
1234 authenticate_h = NULL;
1237 /* 20x responses are counted among successful by default. */
1238 if (H_20X (statcode))
1241 /* Return if redirected. */
1242 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1244 /* RFC2068 says that in case of the 300 (multiple choices)
1245 response, the server can output a preferred URL through
1246 `Location' header; otherwise, the request should be treated
1247 like GET. So, if the location is set, it will be a
1248 redirection; otherwise, just proceed normally. */
1249 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1253 logprintf (LOG_VERBOSE,
1254 _("Location: %s%s\n"),
1255 hs->newloc ? hs->newloc : _("unspecified"),
1256 hs->newloc ? _(" [following]") : "");
1257 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1258 might be more bytes in the body. */
1260 xfree_null (all_headers);
1265 /* If content-type is not given, assume text/html. This is because
1266 of the multitude of broken CGI's that "forget" to generate the
1269 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
1270 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
1275 if (opt.html_extension && (*dt & TEXTHTML))
1276 /* -E / --html-extension / html_extension = on was specified, and this is a
1277 text/html file. If some case-insensitive variation on ".htm[l]" isn't
1278 already the file's suffix, tack on ".html". */
1280 char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
1282 if (last_period_in_local_filename == NULL
1283 || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
1284 || 0 == strcasecmp (last_period_in_local_filename, ".html")))
1286 size_t local_filename_len = strlen(*hs->local_file);
1288 *hs->local_file = xrealloc(*hs->local_file,
1289 local_filename_len + sizeof(".html"));
1290 strcpy(*hs->local_file + local_filename_len, ".html");
1292 *dt |= ADDED_HTML_EXTENSION;
1296 if (contrange == -1)
1298 /* We did not get a content-range header. This means that the
1299 server did not honor our `Range' request. Normally, this
1300 means we should reset hs->restval and continue normally. */
1302 /* However, if `-c' is used, we need to be a bit more careful:
1304 1. If `-c' is specified and the file already existed when
1305 Wget was started, it would be a bad idea for us to start
1306 downloading it from scratch, effectively truncating it. I
1307 believe this cannot happen unless `-c' was specified.
1309 2. If `-c' is used on a file that is already fully
1310 downloaded, we're requesting bytes after the end of file,
1311 which can result in server not honoring `Range'. If this is
1312 the case, `Content-Length' will be equal to the length of the
1314 if (opt.always_rest)
1316 /* Check for condition #2. */
1317 if (hs->restval > 0 /* restart was requested. */
1318 && contlen != -1 /* we got content-length. */
1319 && hs->restval >= contlen /* file fully downloaded
1323 logputs (LOG_VERBOSE, _("\
1324 \n The file is already fully retrieved; nothing to do.\n\n"));
1325 /* In case the caller inspects. */
1328 /* Mark as successfully retrieved. */
1331 xfree_null (all_headers);
1332 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1333 might be more bytes in the body. */
1334 return RETRUNNEEDED;
1337 /* Check for condition #1. */
1338 if (hs->no_truncate)
1340 logprintf (LOG_NOTQUIET,
1343 Continued download failed on this file, which conflicts with `-c'.\n\
1344 Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
1346 xfree_null (all_headers);
1347 CLOSE_INVALIDATE (sock);
1348 return CONTNOTSUPPORTED;
1356 else if (contrange != hs->restval ||
1357 (H_PARTIAL (statcode) && contrange == -1))
1359 /* This means the whole request was somehow misunderstood by the
1360 server. Bail out. */
1362 xfree_null (all_headers);
1363 CLOSE_INVALIDATE (sock);
1370 contlen += contrange;
1372 contrange = -1; /* If conent-length was not sent,
1373 content-range will be ignored. */
1375 hs->contlen = contlen;
1379 if ((*dt & RETROKF) && !opt.server_response)
1381 /* No need to print this output if the body won't be
1382 downloaded at all, or if the original server response is
1384 logputs (LOG_VERBOSE, _("Length: "));
1387 logputs (LOG_VERBOSE, legible (contlen));
1388 if (contrange != -1)
1389 logprintf (LOG_VERBOSE, _(" (%s to go)"),
1390 legible (contlen - contrange));
1393 logputs (LOG_VERBOSE,
1394 opt.ignore_length ? _("ignored") : _("unspecified"));
1396 logprintf (LOG_VERBOSE, " [%s]\n", type);
1398 logputs (LOG_VERBOSE, "\n");
1402 type = NULL; /* We don't need it any more. */
1404 /* Return if we have no intention of further downloading. */
1405 if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1407 /* In case the caller cares to look... */
1411 xfree_null (all_headers);
1412 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1413 might be more bytes in the body. */
1414 return RETRFINISHED;
1417 /* Open the local file. */
1420 mkalldirs (*hs->local_file);
1422 rotate_backups (*hs->local_file);
1423 fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
1426 logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1427 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1428 might be more bytes in the body. */
1429 xfree_null (all_headers);
1435 extern int global_download_count;
1437 /* To ensure that repeated "from scratch" downloads work for -O
1438 files, we rewind the file pointer, unless restval is
1439 non-zero. (This works only when -O is used on regular files,
1440 but it's still a valuable feature.)
1442 However, this loses when more than one URL is specified on
1443 the command line the second rewinds eradicates the contents
1444 of the first download. Thus we disable the above trick for
1445 all the downloads except the very first one.
1447 #### A possible solution to this would be to remember the
1448 file position in the output document and to seek to that
1449 position, instead of rewinding.
1451 We don't truncate stdout, since that breaks
1452 "wget -O - [...] >> foo".
1454 if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
1456 /* This will silently fail for streams that don't correspond
1457 to regular files, but that's OK. */
1459 /* ftruncate is needed because opt.dfp is opened in append
1460 mode if opt.always_rest is set. */
1461 ftruncate (fileno (fp), 0);
1466 /* #### This confuses the code that checks for file size. There
1467 should be some overhead information. */
1468 if (opt.save_headers)
1469 fwrite (all_headers, 1, all_length, fp);
1471 /* Get the contents of the document. */
1472 hs->res = get_contents (sock, fp, &hs->len, hs->restval,
1473 (contlen != -1 ? contlen : 0),
1474 &rbuf, keep_alive, &hs->dltime);
1477 CLOSE_FINISH (sock);
1479 CLOSE_INVALIDATE (sock);
1482 /* Close or flush the file. We have to be careful to check for
1483 error here. Checking the result of fwrite() is not enough --
1484 errors could go unnoticed! */
1487 flush_res = fclose (fp);
1489 flush_res = fflush (fp);
1490 if (flush_res == EOF)
1493 xfree_null (all_headers);
1496 return RETRFINISHED;
1499 /* The genuine HTTP loop! This is the part where the retrieval is
1500 retried, and retried, and retried, and... */
1502 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1503 int *dt, struct url *proxy)
1506 int use_ts, got_head = 0; /* time-stamping info */
1507 char *filename_plus_orig_suffix;
1508 char *local_filename = NULL;
1509 char *tms, *locf, *tmrate;
1511 time_t tml = -1, tmr = -1; /* local and remote time-stamps */
1512 long local_size = 0; /* the size of the local file */
1513 size_t filename_len;
1514 struct http_stat hstat; /* HTTP status */
1518 /* This used to be done in main(), but it's a better idea to do it
1519 here so that we don't go through the hoops if we're just using
1523 if (!wget_cookie_jar)
1524 wget_cookie_jar = cookie_jar_new ();
1525 if (opt.cookies_input && !cookies_loaded_p)
1527 cookie_jar_load (wget_cookie_jar, opt.cookies_input);
1528 cookies_loaded_p = 1;
1534 /* Warn on (likely bogus) wildcard usage in HTTP. Don't use
1535 has_wildcards_p because it would also warn on `?', and we know that
1536 shows up in CGI paths a *lot*. */
1537 if (strchr (u->url, '*'))
1538 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1540 /* Determine the local filename. */
1541 if (local_file && *local_file)
1542 hstat.local_file = local_file;
1543 else if (local_file)
1545 *local_file = url_file_name (u);
1546 hstat.local_file = local_file;
1550 dummy = url_file_name (u);
1551 hstat.local_file = &dummy;
1554 if (!opt.output_document)
1555 locf = *hstat.local_file;
1557 locf = opt.output_document;
1559 hstat.referer = referer;
1561 filename_len = strlen (*hstat.local_file);
1562 filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1564 if (opt.noclobber && file_exists_p (*hstat.local_file))
1566 /* If opt.noclobber is turned on and file already exists, do not
1567 retrieve the file */
1568 logprintf (LOG_VERBOSE, _("\
1569 File `%s' already there, will not retrieve.\n"), *hstat.local_file);
1570 /* If the file is there, we suppose it's retrieved OK. */
1573 /* #### Bogusness alert. */
1574 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1575 if (has_html_suffix_p (*hstat.local_file))
1583 if (opt.timestamping)
1585 int local_dot_orig_file_exists = 0;
1587 if (opt.backup_converted)
1588 /* If -K is specified, we'll act on the assumption that it was specified
1589 last time these files were downloaded as well, and instead of just
1590 comparing local file X against server file X, we'll compare local
1591 file X.orig (if extant, else X) against server file X. If -K
1592 _wasn't_ specified last time, or the server contains files called
1593 *.orig, -N will be back to not operating correctly with -k. */
1595 /* Would a single s[n]printf() call be faster? --dan
1597 Definitely not. sprintf() is horribly slow. It's a
1598 different question whether the difference between the two
1599 affects a program. Usually I'd say "no", but at one
1600 point I profiled Wget, and found that a measurable and
1601 non-negligible amount of time was lost calling sprintf()
1602 in url.c. Replacing sprintf with inline calls to
1603 strcpy() and long_to_string() made a difference.
1605 memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
1606 memcpy (filename_plus_orig_suffix + filename_len,
1607 ".orig", sizeof (".orig"));
1609 /* Try to stat() the .orig file. */
1610 if (stat (filename_plus_orig_suffix, &st) == 0)
1612 local_dot_orig_file_exists = 1;
1613 local_filename = filename_plus_orig_suffix;
1617 if (!local_dot_orig_file_exists)
1618 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1619 if (stat (*hstat.local_file, &st) == 0)
1620 local_filename = *hstat.local_file;
1622 if (local_filename != NULL)
1623 /* There was a local file, so we'll check later to see if the version
1624 the server has is the same version we already have, allowing us to
1630 /* Modification time granularity is 2 seconds for Windows, so
1631 increase local time by 1 second for later comparison. */
1634 local_size = st.st_size;
1638 /* Reset the counter. */
1640 *dt = 0 | ACCEPTRANGES;
1644 /* Increment the pass counter. */
1646 sleep_between_retrievals (count);
1647 /* Get the current time string. */
1648 tms = time_str (NULL);
1649 /* Print fetch message, if opt.verbose. */
1652 char *hurl = url_string (u, 1);
1656 sprintf (tmp, _("(try:%2d)"), count);
1657 logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
1658 tms, hurl, tmp, locf);
1660 ws_changetitle (hurl, 1);
1665 /* Default document type is empty. However, if spider mode is
1666 on or time-stamping is employed, HEAD_ONLY commands is
1667 encoded within *dt. */
1668 if (opt.spider || (use_ts && !got_head))
1672 /* Assume no restarting. */
1674 /* Decide whether or not to restart. */
1675 if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
1676 /* #### this calls access() and then stat(); could be optimized. */
1677 && file_exists_p (locf))
1678 if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
1679 hstat.restval = st.st_size;
1681 /* In `-c' is used and the file is existing and non-empty,
1682 refuse to truncate it if the server doesn't support continued
1684 hstat.no_truncate = 0;
1685 if (opt.always_rest && hstat.restval)
1686 hstat.no_truncate = 1;
1688 /* Decide whether to send the no-cache directive. We send it in
1690 a) we're using a proxy, and we're past our first retrieval.
1691 Some proxies are notorious for caching incomplete data, so
1692 we require a fresh get.
1693 b) caching is explicitly inhibited. */
1694 if ((proxy && count > 1) /* a */
1695 || !opt.allow_cache /* b */
1697 *dt |= SEND_NOCACHE;
1699 *dt &= ~SEND_NOCACHE;
1701 /* Try fetching the document, or at least its head. */
1702 err = gethttp (u, &hstat, dt, proxy);
1704 /* It's unfortunate that wget determines the local filename before finding
1705 out the Content-Type of the file. Barring a major restructuring of the
1706 code, we need to re-set locf here, since gethttp() may have xrealloc()d
1707 *hstat.local_file to tack on ".html". */
1708 if (!opt.output_document)
1709 locf = *hstat.local_file;
1711 locf = opt.output_document;
1714 tms = time_str (NULL);
1715 /* Get the new location (with or without the redirection). */
1717 *newloc = xstrdup (hstat.newloc);
1720 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
1721 case CONERROR: case READERR: case WRITEFAILED:
1723 /* Non-fatal errors continue executing the loop, which will
1724 bring them to "while" statement at the end, to judge
1725 whether the number of tries was exceeded. */
1726 free_hstat (&hstat);
1727 printwhat (count, opt.ntry);
1730 case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
1731 case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
1732 /* Fatal errors just return from the function. */
1733 free_hstat (&hstat);
1737 case FWRITEERR: case FOPENERR:
1738 /* Another fatal error. */
1739 logputs (LOG_VERBOSE, "\n");
1740 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
1741 *hstat.local_file, strerror (errno));
1742 free_hstat (&hstat);
1747 /* Another fatal error. */
1748 logputs (LOG_VERBOSE, "\n");
1749 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
1750 free_hstat (&hstat);
1755 /* Return the new location to the caller. */
1758 logprintf (LOG_NOTQUIET,
1759 _("ERROR: Redirection (%d) without location.\n"),
1761 free_hstat (&hstat);
1765 free_hstat (&hstat);
1770 /* The file was already fully retrieved. */
1771 free_hstat (&hstat);
1776 /* Deal with you later. */
1779 /* All possibilities should have been exhausted. */
1782 if (!(*dt & RETROKF))
1786 /* #### Ugly ugly ugly! */
1787 char *hurl = url_string (u, 1);
1788 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
1791 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
1792 tms, hstat.statcode, hstat.error);
1793 logputs (LOG_VERBOSE, "\n");
1794 free_hstat (&hstat);
1799 /* Did we get the time-stamp? */
1802 if (opt.timestamping && !hstat.remote_time)
1804 logputs (LOG_NOTQUIET, _("\
1805 Last-modified header missing -- time-stamps turned off.\n"));
1807 else if (hstat.remote_time)
1809 /* Convert the date-string into struct tm. */
1810 tmr = http_atotm (hstat.remote_time);
1811 if (tmr == (time_t) (-1))
1812 logputs (LOG_VERBOSE, _("\
1813 Last-modified header invalid -- time-stamp ignored.\n"));
1817 /* The time-stamping section. */
1822 use_ts = 0; /* no more time-stamping */
1823 count = 0; /* the retrieve count for HEAD is
1825 if (hstat.remote_time && tmr != (time_t) (-1))
1827 /* Now time-stamping can be used validly. Time-stamping
1828 means that if the sizes of the local and remote file
1829 match, and local file is newer than the remote file,
1830 it will not be retrieved. Otherwise, the normal
1831 download procedure is resumed. */
1833 (hstat.contlen == -1 || local_size == hstat.contlen))
1835 logprintf (LOG_VERBOSE, _("\
1836 Server file no newer than local file `%s' -- not retrieving.\n\n"),
1838 free_hstat (&hstat);
1842 else if (tml >= tmr)
1843 logprintf (LOG_VERBOSE, _("\
1844 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
1846 logputs (LOG_VERBOSE,
1847 _("Remote file is newer, retrieving.\n"));
1849 free_hstat (&hstat);
1852 if ((tmr != (time_t) (-1))
1854 && ((hstat.len == hstat.contlen) ||
1855 ((hstat.res == 0) &&
1856 ((hstat.contlen == -1) ||
1857 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
1859 /* #### This code repeats in http.c and ftp.c. Move it to a
1861 const char *fl = NULL;
1862 if (opt.output_document)
1864 if (opt.od_known_regular)
1865 fl = opt.output_document;
1868 fl = *hstat.local_file;
1872 /* End of time-stamping section. */
1876 logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
1881 tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
1883 if (hstat.len == hstat.contlen)
1887 logprintf (LOG_VERBOSE,
1888 _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
1889 tms, tmrate, locf, hstat.len, hstat.contlen);
1890 logprintf (LOG_NONVERBOSE,
1891 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
1892 tms, u->url, hstat.len, hstat.contlen, locf, count);
1895 total_downloaded_bytes += hstat.len;
1897 /* Remember that we downloaded the file for later ".orig" code. */
1898 if (*dt & ADDED_HTML_EXTENSION)
1899 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1901 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1903 free_hstat (&hstat);
1907 else if (hstat.res == 0) /* No read error */
1909 if (hstat.contlen == -1) /* We don't know how much we were supposed
1910 to get, so assume we succeeded. */
1914 logprintf (LOG_VERBOSE,
1915 _("%s (%s) - `%s' saved [%ld]\n\n"),
1916 tms, tmrate, locf, hstat.len);
1917 logprintf (LOG_NONVERBOSE,
1918 "%s URL:%s [%ld] -> \"%s\" [%d]\n",
1919 tms, u->url, hstat.len, locf, count);
1922 total_downloaded_bytes += hstat.len;
1924 /* Remember that we downloaded the file for later ".orig" code. */
1925 if (*dt & ADDED_HTML_EXTENSION)
1926 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1928 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1930 free_hstat (&hstat);
1934 else if (hstat.len < hstat.contlen) /* meaning we lost the
1935 connection too soon */
1937 logprintf (LOG_VERBOSE,
1938 _("%s (%s) - Connection closed at byte %ld. "),
1939 tms, tmrate, hstat.len);
1940 printwhat (count, opt.ntry);
1941 free_hstat (&hstat);
1944 else if (!opt.kill_longer) /* meaning we got more than expected */
1946 logprintf (LOG_VERBOSE,
1947 _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
1948 tms, tmrate, locf, hstat.len, hstat.contlen);
1949 logprintf (LOG_NONVERBOSE,
1950 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
1951 tms, u->url, hstat.len, hstat.contlen, locf, count);
1953 total_downloaded_bytes += hstat.len;
1955 /* Remember that we downloaded the file for later ".orig" code. */
1956 if (*dt & ADDED_HTML_EXTENSION)
1957 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1959 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1961 free_hstat (&hstat);
1965 else /* the same, but not accepted */
1967 logprintf (LOG_VERBOSE,
1968 _("%s (%s) - Connection closed at byte %ld/%ld. "),
1969 tms, tmrate, hstat.len, hstat.contlen);
1970 printwhat (count, opt.ntry);
1971 free_hstat (&hstat);
1975 else /* now hstat.res can only be -1 */
1977 if (hstat.contlen == -1)
1979 logprintf (LOG_VERBOSE,
1980 _("%s (%s) - Read error at byte %ld (%s)."),
1981 tms, tmrate, hstat.len, strerror (errno));
1982 printwhat (count, opt.ntry);
1983 free_hstat (&hstat);
1986 else /* hstat.res == -1 and contlen is given */
1988 logprintf (LOG_VERBOSE,
1989 _("%s (%s) - Read error at byte %ld/%ld (%s). "),
1990 tms, tmrate, hstat.len, hstat.contlen,
1992 printwhat (count, opt.ntry);
1993 free_hstat (&hstat);
2000 while (!opt.ntry || (count < opt.ntry));
2004 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2005 than local timezone.
2007 mktime is similar but assumes struct tm, also known as the
2008 "broken-down" form of time, is in local time zone. mktime_from_utc
2009 uses mktime to make the conversion understanding that an offset
2010 will be introduced by the local time assumption.
2012 mktime_from_utc then measures the introduced offset by applying
2013 gmtime to the initial result and applying mktime to the resulting
2014 "broken-down" form. The difference between the two mktime results
2015 is the measured offset which is then subtracted from the initial
2016 mktime result to yield a calendar time which is the value returned.
2018 tm_isdst in struct tm is set to 0 to force mktime to introduce a
2019 consistent offset (the non DST offset) since tm and tm+o might be
2020 on opposite sides of a DST change.
2022 Some implementations of mktime return -1 for the nonexistent
2023 localtime hour at the beginning of DST. In this event, use
2024 mktime(tm - 1hr) + 3600.
2028 gmtime(t+o) --> tm+o
2029 mktime(tm+o) --> t+2o
2030 t+o - (t+2o - t+o) = t
2032 Note that glibc contains a function of the same purpose named
2033 `timegm' (reverse of gmtime). But obviously, it is not universally
2034 available, and unfortunately it is not straightforwardly
2035 extractable for use here. Perhaps configure should detect timegm
2036 and use it where available.
2038 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2039 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2040 Further improved by Roger with assistance from Edward J. Sabol
2041 based on input by Jamie Zawinski. */
2044 mktime_from_utc (struct tm *t)
2055 return -1; /* can't deal with output from strptime */
2066 return -1; /* can't deal with output from gmtime */
2069 return (tl - (tb - tl));
2072 /* Check whether the result of strptime() indicates success.
2073 strptime() returns the pointer to how far it got to in the string.
2074 The processing has been successful if the string is at `GMT' or
2075 `+X', or at the end of the string.
2077 In extended regexp parlance, the function returns 1 if P matches
2078 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2079 can return) is considered a failure and 0 is returned. */
2081 check_end (const char *p)
2085 while (ISSPACE (*p))
2088 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2089 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2095 /* Convert the textual specification of time in TIME_STRING to the
2096 number of seconds since the Epoch.
2098 TIME_STRING can be in any of the three formats RFC2068 allows the
2099 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date.
2100 Timezones are ignored, and should be GMT.
2102 Return the computed time_t representation, or -1 if the conversion
2105 This function uses strptime with various string formats for parsing
2106 TIME_STRING. This results in a parser that is not as lenient in
2107 interpreting TIME_STRING as I would like it to be. Being based on
2108 strptime, it always allows shortened months, one-digit days, etc.,
2109 but due to the multitude of formats in which time can be
2110 represented, an ideal HTTP time parser would be even more
2111 forgiving. It should completely ignore things like week days and
2112 concentrate only on the various forms of representing years,
2113 months, days, hours, minutes, and seconds. For example, it would
2114 be nice if it accepted ISO 8601 out of the box.
2116 I've investigated free and PD code for this purpose, but none was
2117 usable. getdate was big and unwieldy, and had potential copyright
2118 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2119 distributed with phttpd, is excellent, but we cannot use it because
2120 it is not assigned to the FSF. So I stuck it with strptime. */
2123 http_atotm (const char *time_string)
2125 /* NOTE: Solaris strptime man page claims that %n and %t match white
2126 space, but that's not universally available. Instead, we simply
2127 use ` ' to mean "skip all WS", which works under all strptime
2128 implementations I've tested. */
2130 static const char *time_formats[] = {
2131 "%a, %d %b %Y %T", /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
2132 "%A, %d-%b-%y %T", /* RFC850: Thursday, 29-Jan-98 22:12:57 */
2133 "%a, %d-%b-%Y %T", /* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
2134 (google.com uses this for their cookies.) */
2135 "%a %b %d %T %Y" /* asctime: Thu Jan 29 22:12:57 1998 */
2141 /* According to Roger Beeman, we need to initialize tm_isdst, since
2142 strptime won't do it. */
2145 /* Note that under foreign locales Solaris strptime() fails to
2146 recognize English dates, which renders this function useless. We
2147 solve this by being careful not to affect LC_TIME when
2148 initializing locale.
2150 Another solution would be to temporarily set locale to C, invoke
2151 strptime(), and restore it back. This is slow and dirty,
2152 however, and locale support other than LC_MESSAGES can mess other
2153 things, so I rather chose to stick with just setting LC_MESSAGES.
2155 GNU strptime does not have this problem because it recognizes
2156 both international and local dates. */
2158 for (i = 0; i < countof (time_formats); i++)
2159 if (check_end (strptime (time_string, time_formats[i], &t)))
2160 return mktime_from_utc (&t);
2162 /* All formats have failed. */
2166 /* Authorization support: We support two authorization schemes:
2168 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2170 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2171 consisting of answering to the server's challenge with the proper
2174 /* How many bytes it will take to store LEN bytes in base64. */
2175 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
2177 /* Encode the string S of length LENGTH to base64 format and place it
2178 to STORE. STORE will be 0-terminated, and must point to a writable
2179 buffer of at least 1+BASE64_LENGTH(length) bytes. */
2181 base64_encode (const char *s, char *store, int length)
2183 /* Conversion table. */
2184 static char tbl[64] = {
2185 'A','B','C','D','E','F','G','H',
2186 'I','J','K','L','M','N','O','P',
2187 'Q','R','S','T','U','V','W','X',
2188 'Y','Z','a','b','c','d','e','f',
2189 'g','h','i','j','k','l','m','n',
2190 'o','p','q','r','s','t','u','v',
2191 'w','x','y','z','0','1','2','3',
2192 '4','5','6','7','8','9','+','/'
2195 unsigned char *p = (unsigned char *)store;
2197 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
2198 for (i = 0; i < length; i += 3)
2200 *p++ = tbl[s[0] >> 2];
2201 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2202 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2203 *p++ = tbl[s[2] & 0x3f];
2206 /* Pad the result if necessary... */
2207 if (i == length + 1)
2209 else if (i == length + 2)
2210 *(p - 1) = *(p - 2) = '=';
2211 /* ...and zero-terminate it. */
2215 /* Create the authentication header contents for the `Basic' scheme.
2216 This is done by encoding the string `USER:PASS' in base64 and
2217 prepending `HEADER: Basic ' to it. */
2219 basic_authentication_encode (const char *user, const char *passwd,
2222 char *t1, *t2, *res;
2223 int len1 = strlen (user) + 1 + strlen (passwd);
2224 int len2 = BASE64_LENGTH (len1);
2226 t1 = (char *)alloca (len1 + 1);
2227 sprintf (t1, "%s:%s", user, passwd);
2228 t2 = (char *)alloca (1 + len2);
2229 base64_encode (t1, t2, len1);
2230 res = (char *)xmalloc (len2 + 11 + strlen (header));
2231 sprintf (res, "%s: Basic %s\r\n", header, t2);
2237 /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
2238 of a field in such a header. If the field is the one specified by
2239 ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2240 digest authorization code), extract its value in the (char*)
2241 variable pointed by RET. Returns negative on a malformed header,
2242 or number of bytes that have been parsed by this call. */
2244 extract_header_attr (const char *au, const char *attr_name, char **ret)
2246 const char *cp, *ep;
2250 if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2252 cp += strlen (attr_name);
2255 cp += skip_lws (cp);
2260 cp += skip_lws (cp);
2265 for (ep = cp; *ep && *ep != '\"'; ep++)
2270 *ret = strdupdelim (cp, ep);
2277 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2278 an array of 16 bytes containing the hash keys, and BUF should be a
2279 buffer of 33 writable characters (32 for hex digits plus one for
2280 zero termination). */
2282 dump_hash (unsigned char *buf, const unsigned char *hash)
2286 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2288 *buf++ = XNUM_TO_digit (*hash >> 4);
2289 *buf++ = XNUM_TO_digit (*hash & 0xf);
2294 /* Take the line apart to find the challenge, and compose a digest
2295 authorization header. See RFC2069 section 2.1.2. */
2297 digest_authentication_encode (const char *au, const char *user,
2298 const char *passwd, const char *method,
2301 static char *realm, *opaque, *nonce;
2306 { "realm", &realm },
2307 { "opaque", &opaque },
2312 realm = opaque = nonce = NULL;
2314 au += 6; /* skip over `Digest' */
2319 au += skip_lws (au);
2320 for (i = 0; i < countof (options); i++)
2322 int skip = extract_header_attr (au, options[i].name,
2323 options[i].variable);
2327 xfree_null (opaque);
2337 if (i == countof (options))
2339 while (*au && *au != '=')
2343 au += skip_lws (au);
2347 while (*au && *au != '\"')
2354 while (*au && *au != ',')
2359 if (!realm || !nonce || !user || !passwd || !path || !method)
2362 xfree_null (opaque);
2367 /* Calculate the digest value. */
2369 ALLOCA_MD5_CONTEXT (ctx);
2370 unsigned char hash[MD5_HASHLEN];
2371 unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2372 unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2374 /* A1BUF = H(user ":" realm ":" password) */
2376 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2377 gen_md5_update ((unsigned char *)":", 1, ctx);
2378 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2379 gen_md5_update ((unsigned char *)":", 1, ctx);
2380 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2381 gen_md5_finish (ctx, hash);
2382 dump_hash (a1buf, hash);
2384 /* A2BUF = H(method ":" path) */
2386 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2387 gen_md5_update ((unsigned char *)":", 1, ctx);
2388 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2389 gen_md5_finish (ctx, hash);
2390 dump_hash (a2buf, hash);
2392 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2394 gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2395 gen_md5_update ((unsigned char *)":", 1, ctx);
2396 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2397 gen_md5_update ((unsigned char *)":", 1, ctx);
2398 gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2399 gen_md5_finish (ctx, hash);
2400 dump_hash (response_digest, hash);
2402 res = (char*) xmalloc (strlen (user)
2407 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2408 + (opaque ? strlen (opaque) : 0)
2410 sprintf (res, "Authorization: Digest \
2411 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2412 user, realm, nonce, path, response_digest);
2415 char *p = res + strlen (res);
2416 strcat (p, ", opaque=\"");
2420 strcat (res, "\r\n");
2424 #endif /* USE_DIGEST */
2427 #define BEGINS_WITH(line, string_constant) \
2428 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
2429 && (ISSPACE (line[sizeof (string_constant) - 1]) \
2430 || !line[sizeof (string_constant) - 1]))
2433 known_authentication_scheme_p (const char *au)
2435 return BEGINS_WITH (au, "Basic")
2436 || BEGINS_WITH (au, "Digest")
2437 || BEGINS_WITH (au, "NTLM");
2442 /* Create the HTTP authorization request header. When the
2443 `WWW-Authenticate' response header is seen, according to the
2444 authorization scheme specified in that header (`Basic' and `Digest'
2445 are supported by the current implementation), produce an
2446 appropriate HTTP authorization request header. */
2448 create_authorization_line (const char *au, const char *user,
2449 const char *passwd, const char *method,
2452 char *wwwauth = NULL;
2454 if (!strncasecmp (au, "Basic", 5))
2455 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
2456 if (!strncasecmp (au, "NTLM", 4))
2457 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
2459 else if (!strncasecmp (au, "Digest", 6))
2460 wwwauth = digest_authentication_encode (au, user, passwd, method, path);
2461 #endif /* USE_DIGEST */