2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
25 #include <sys/types.h>
36 #if TIME_WITH_SYS_TIME
37 # include <sys/time.h>
41 # include <sys/time.h>
58 # include "gen_sslfunc.h"
65 extern char *version_string;
71 static int cookies_loaded_p;
73 #define TEXTHTML_S "text/html"
74 #define HTTP_ACCEPT "*/*"
76 /* Some status code validation macros: */
77 #define H_20X(x) (((x) >= 200) && ((x) < 300))
78 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
79 #define H_REDIRECTED(x) (((x) == HTTP_STATUS_MOVED_PERMANENTLY) \
80 || ((x) == HTTP_STATUS_MOVED_TEMPORARILY))
82 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
84 #define HTTP_STATUS_OK 200
85 #define HTTP_STATUS_CREATED 201
86 #define HTTP_STATUS_ACCEPTED 202
87 #define HTTP_STATUS_NO_CONTENT 204
88 #define HTTP_STATUS_PARTIAL_CONTENTS 206
90 /* Redirection 3xx. */
91 #define HTTP_STATUS_MULTIPLE_CHOICES 300
92 #define HTTP_STATUS_MOVED_PERMANENTLY 301
93 #define HTTP_STATUS_MOVED_TEMPORARILY 302
94 #define HTTP_STATUS_NOT_MODIFIED 304
96 /* Client error 4xx. */
97 #define HTTP_STATUS_BAD_REQUEST 400
98 #define HTTP_STATUS_UNAUTHORIZED 401
99 #define HTTP_STATUS_FORBIDDEN 403
100 #define HTTP_STATUS_NOT_FOUND 404
102 /* Server errors 5xx. */
103 #define HTTP_STATUS_INTERNAL 500
104 #define HTTP_STATUS_NOT_IMPLEMENTED 501
105 #define HTTP_STATUS_BAD_GATEWAY 502
106 #define HTTP_STATUS_UNAVAILABLE 503
109 /* Parse the HTTP status line, which is of format:
111 HTTP-Version SP Status-Code SP Reason-Phrase
113 The function returns the status-code, or -1 if the status line is
114 malformed. The pointer to reason-phrase is returned in RP. */
116 parse_http_status_line (const char *line, const char **reason_phrase_ptr)
118 /* (the variables must not be named `major' and `minor', because
119 that breaks compilation with SunOS4 cc.) */
120 int mjr, mnr, statcode;
123 *reason_phrase_ptr = NULL;
125 /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is
126 major version, and Y is minor version. */
127 if (strncmp (line, "HTTP/", 5) != 0)
131 /* Calculate major HTTP version. */
133 for (mjr = 0; ISDIGIT (*line); line++)
134 mjr = 10 * mjr + (*line - '0');
135 if (*line != '.' || p == line)
139 /* Calculate minor HTTP version. */
141 for (mnr = 0; ISDIGIT (*line); line++)
142 mnr = 10 * mnr + (*line - '0');
143 if (*line != ' ' || p == line)
145 /* Wget will accept only 1.0 and higher HTTP-versions. The value of
146 minor version can be safely ignored. */
151 /* Calculate status code. */
152 if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2])))
154 statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0');
156 /* Set up the reason phrase pointer. */
158 /* RFC2068 requires SPC here, but we allow the string to finish
159 here, in case no reason-phrase is present. */
163 *reason_phrase_ptr = line;
168 *reason_phrase_ptr = line + 1;
173 #define WMIN(x, y) ((x) > (y) ? (y) : (x))
175 /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
176 PROMISED_SIZE bytes are sent over the wire -- if the file is
177 longer, read only that much; if the file is shorter, pad it with
181 post_file (int sock, void *ssl, const char *file_name, long promised_size)
183 static char chunk[8192];
188 /* Only one of SOCK and SSL may be active at the same time. */
189 assert (sock > -1 || ssl != NULL);
190 assert (sock == -1 || ssl == NULL);
192 DEBUGP (("[writing POST file %s ... ", file_name));
194 fp = fopen (file_name, "rb");
197 while (written < promised_size)
200 int length = fread (chunk, 1, sizeof (chunk), fp);
203 towrite = WMIN (promised_size - written, length);
206 write_error = ssl_iwrite (ssl, chunk, towrite);
209 write_error = iwrite (sock, chunk, towrite);
219 if (written < promised_size)
221 DEBUGP (("padding ... "));
222 /* This highly unlikely case can happen only if the file has
223 shrunk while we weren't looking. To uphold the promise, pad
224 the remaining data with zeros. #### Should we abort
226 memset (chunk, '\0', sizeof (chunk));
227 while (written < promised_size)
229 long towrite = WMIN (promised_size - written, sizeof (chunk));
232 write_error = ssl_iwrite (ssl, chunk, towrite);
235 write_error = iwrite (sock, chunk, towrite);
241 assert (written == promised_size);
242 DEBUGP (("done]\n"));
246 /* Functions to be used as arguments to header_process(): */
248 struct http_process_range_closure {
254 /* Parse the `Content-Range' header and extract the information it
255 contains. Returns 1 if successful, -1 otherwise. */
257 http_process_range (const char *hdr, void *arg)
259 struct http_process_range_closure *closure
260 = (struct http_process_range_closure *)arg;
263 /* Certain versions of Nutscape proxy server send out
264 `Content-Length' without "bytes" specifier, which is a breach of
265 RFC2068 (as well as the HTTP/1.1 draft which was current at the
266 time). But hell, I must support it... */
267 if (!strncasecmp (hdr, "bytes", 5))
270 /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
274 hdr += skip_lws (hdr);
280 for (num = 0; ISDIGIT (*hdr); hdr++)
281 num = 10 * num + (*hdr - '0');
282 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
284 closure->first_byte_pos = num;
286 for (num = 0; ISDIGIT (*hdr); hdr++)
287 num = 10 * num + (*hdr - '0');
288 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
290 closure->last_byte_pos = num;
292 for (num = 0; ISDIGIT (*hdr); hdr++)
293 num = 10 * num + (*hdr - '0');
294 closure->entity_length = num;
298 /* Place 1 to ARG if the HDR contains the word "none", 0 otherwise.
299 Used for `Accept-Ranges'. */
301 http_process_none (const char *hdr, void *arg)
303 int *where = (int *)arg;
305 if (strstr (hdr, "none"))
312 /* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */
314 http_process_type (const char *hdr, void *arg)
316 char **result = (char **)arg;
317 /* Locate P on `;' or the terminating zero, whichever comes first. */
318 const char *p = strchr (hdr, ';');
320 p = hdr + strlen (hdr);
321 while (p > hdr && ISSPACE (*(p - 1)))
323 *result = strdupdelim (hdr, p);
327 /* Check whether the `Connection' header is set to "keep-alive". */
329 http_process_connection (const char *hdr, void *arg)
331 int *flag = (int *)arg;
332 if (!strcasecmp (hdr, "Keep-Alive"))
337 /* Persistent connections. Currently, we cache the most recently used
338 connection as persistent, provided that the HTTP server agrees to
339 make it such. The persistence data is stored in the variables
340 below. Ideally, it would be in a structure, and it should be
341 possible to cache an arbitrary fixed number of these connections.
343 I think the code is quite easy to extend in that direction. */
345 /* Whether a persistent connection is active. */
346 static int pc_active_p;
347 /* Host and port of currently active persistent connection. */
348 static struct address_list *pc_last_host_ip;
349 static unsigned short pc_last_port;
351 /* File descriptor of the currently active persistent connection. */
352 static int pc_last_fd;
355 /* Whether a ssl handshake has occoured on this connection */
356 static int pc_active_ssl;
357 /* SSL connection of the currently active persistent connection. */
358 static SSL *pc_last_ssl;
359 #endif /* HAVE_SSL */
361 /* Mark the persistent connection as invalid. This is used by the
362 CLOSE_* macros after they forcefully close a registered persistent
363 connection. This does not close the file descriptor -- it is left
364 to the caller to do that. (Maybe it should, though.) */
367 invalidate_persistent (void)
372 #endif /* HAVE_SSL */
373 if (pc_last_host_ip != NULL)
375 address_list_release (pc_last_host_ip);
376 pc_last_host_ip = NULL;
378 DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
381 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
382 persistent. This will enable someone to use the same connection
383 later. In the context of HTTP, this must be called only AFTER the
384 response has been received and the server has promised that the
385 connection will remain alive.
387 If a previous connection was persistent, it is closed. */
391 register_persistent (const char *host, unsigned short port, int fd, SSL *ssl)
395 register_persistent (const char *host, unsigned short port, int fd)
400 if (pc_last_fd == fd)
402 /* The connection FD is already registered. Nothing to
408 /* The old persistent connection is still active; let's
409 close it first. This situation arises whenever a
410 persistent connection exists, but we then connect to a
411 different host, and try to register a persistent
412 connection to that one. */
414 /* The ssl disconnect has to take place before the closing
417 shutdown_ssl(pc_last_ssl);
420 invalidate_persistent ();
424 assert (pc_last_host_ip == NULL);
426 /* This lookup_host cannot fail, because it has the results in the
428 pc_last_host_ip = lookup_host (host, 1);
429 assert (pc_last_host_ip != NULL);
436 pc_active_ssl = ssl ? 1 : 0;
438 DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
442 # define SHUTDOWN_SSL(ssl) do { \
444 shutdown_ssl (ssl); \
447 # define SHUTDOWN_SSL(ssl)
450 /* Return non-zero if a persistent connection is available for
451 connecting to HOST:PORT. */
455 persistent_available_p (const char *host, unsigned short port, int ssl)
459 persistent_available_p (const char *host, unsigned short port)
463 struct address_list *this_host_ip;
465 /* First, check whether a persistent connection is active at all. */
468 /* Second, check if the active connection pertains to the correct
469 (HOST, PORT) ordered pair. */
470 if (port != pc_last_port)
474 /* Second, a): check if current connection is (not) ssl, too. This
475 test is unlikely to fail because HTTP and HTTPS typicaly use
476 different ports. Yet it is possible, or so I [Christian
477 Fraenkel] have been told, to run HTTPS and HTTP simultaneus on
479 if (ssl != pc_active_ssl)
481 #endif /* HAVE_SSL */
483 this_host_ip = lookup_host (host, 1);
487 /* To equate the two host names for the purposes of persistent
488 connections, they need to share all the IP addresses in the
490 success = address_list_match_all (pc_last_host_ip, this_host_ip);
491 address_list_release (this_host_ip);
495 /* Third: check whether the connection is still open. This is
496 important because most server implement a liberal (short) timeout
497 on persistent connections. Wget can of course always reconnect
498 if the connection doesn't work out, but it's nicer to know in
499 advance. This test is a logical followup of the first test, but
500 is "expensive" and therefore placed at the end of the list. */
501 if (!test_socket_open (pc_last_fd))
503 /* Oops, the socket is no longer open. Now that we know that,
504 let's invalidate the persistent connection before returning
508 SHUTDOWN_SSL (pc_last_ssl);
511 invalidate_persistent ();
517 /* The idea behind these two CLOSE macros is to distinguish between
518 two cases: one when the job we've been doing is finished, and we
519 want to close the connection and leave, and two when something is
520 seriously wrong and we're closing the connection as part of
523 In case of keep_alive, CLOSE_FINISH should leave the connection
524 open, while CLOSE_INVALIDATE should still close it.
526 Note that the semantics of the flag `keep_alive' is "this
527 connection *will* be reused (the server has promised not to close
528 the connection once we're done)", while the semantics of
529 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
530 active, registered connection". */
532 #define CLOSE_FINISH(fd) do { \
535 SHUTDOWN_SSL (ssl); \
537 if (pc_active_p && (fd) == pc_last_fd) \
538 invalidate_persistent (); \
542 #define CLOSE_INVALIDATE(fd) do { \
543 SHUTDOWN_SSL (ssl); \
545 if (pc_active_p && (fd) == pc_last_fd) \
546 invalidate_persistent (); \
551 long len; /* received length */
552 long contlen; /* expected length */
553 long restval; /* the restart value */
554 int res; /* the result of last read */
555 char *newloc; /* new location (redirection) */
556 char *remote_time; /* remote time-stamp string */
557 char *error; /* textual HTTP error */
558 int statcode; /* status code */
559 long dltime; /* time of the download */
560 int no_truncate; /* whether truncating the file is
562 const char *referer; /* value of the referer header. */
563 char **local_file; /* local file. */
567 free_hstat (struct http_stat *hs)
569 FREE_MAYBE (hs->newloc);
570 FREE_MAYBE (hs->remote_time);
571 FREE_MAYBE (hs->error);
573 /* Guard against being called twice. */
575 hs->remote_time = NULL;
579 static char *create_authorization_line PARAMS ((const char *, const char *,
580 const char *, const char *,
582 static char *basic_authentication_encode PARAMS ((const char *, const char *,
584 static int known_authentication_scheme_p PARAMS ((const char *));
586 time_t http_atotm PARAMS ((char *));
588 #define BEGINS_WITH(line, string_constant) \
589 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
590 && (ISSPACE (line[sizeof (string_constant) - 1]) \
591 || !line[sizeof (string_constant) - 1]))
593 /* Retrieve a document through HTTP protocol. It recognizes status
594 code, and correctly handles redirections. It closes the network
595 socket. If it receives an error from the functions below it, it
596 will print it if there is enough information to do so (almost
597 always), returning the error to the caller (i.e. http_loop).
599 Various HTTP parameters are stored to hs. Although it parses the
600 response code correctly, it is not used in a sane way. The caller
603 If PROXY is non-NULL, the connection will be made to the proxy
604 server, and u->url will be requested. */
606 gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
608 char *request, *type, *command, *full_path;
610 char *pragma_h, *referer, *useragent, *range, *wwwauth;
611 char *authenticate_h;
615 char *request_keep_alive;
616 int sock, hcount, all_length, statcode;
618 long contlen, contrange;
621 int auth_tried_already;
624 static SSL_CTX *ssl_ctx = NULL;
627 char *cookies = NULL;
629 /* Whether this connection will be kept alive after the HTTP request
633 /* Flags that detect the two ways of specifying HTTP keep-alive
635 int http_keep_alive_1, http_keep_alive_2;
637 /* Whether keep-alive should be inhibited. */
638 int inhibit_keep_alive;
640 /* Whether we need to print the host header with braces around host,
641 e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual
642 "Host: symbolic-name:1234". */
643 int squares_around_host = 0;
645 /* Headers sent when using POST. */
646 char *post_content_type, *post_content_length;
650 /* initialize ssl_ctx on first run */
653 uerr_t err = init_ssl (&ssl_ctx);
658 case SSLERRCTXCREATE:
660 logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
664 /* try without certfile */
665 logprintf (LOG_NOTQUIET,
666 _("Failed to load certificates from %s\n"),
669 logprintf (LOG_NOTQUIET,
670 _("Trying without the specified certificate\n"));
673 logprintf (LOG_NOTQUIET,
674 _("Failed to get certificate key from %s\n"),
677 logprintf (LOG_NOTQUIET,
678 _("Trying without the specified certificate\n"));
685 #endif /* HAVE_SSL */
687 if (!(*dt & HEAD_ONLY))
688 /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
689 know the local filename so we can save to it. */
690 assert (*hs->local_file != NULL);
693 auth_tried_already = 0;
695 inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
698 /* We need to come back here when the initial attempt to retrieve
699 without authorization header fails. (Expected to happen at least
700 for the Digest authorization scheme.) */
703 http_keep_alive_1 = http_keep_alive_2 = 0;
705 post_content_type = NULL;
706 post_content_length = NULL;
708 /* Initialize certain elements of struct http_stat. */
713 hs->remote_time = NULL;
716 /* If we're using a proxy, we will be connecting to the proxy
718 conn = proxy ? proxy : u;
720 /* First: establish the connection. */
721 if (inhibit_keep_alive
724 !persistent_available_p (conn->host, conn->port)
726 !persistent_available_p (conn->host, conn->port,
727 u->scheme == SCHEME_HTTPS)
728 #endif /* HAVE_SSL */
731 struct address_list *al = lookup_host (conn->host, 0);
734 set_connection_host_name (conn->host);
735 sock = connect_to_many (al, conn->port, 0);
736 set_connection_host_name (NULL);
737 address_list_release (al);
740 return errno == ECONNREFUSED ? CONREFUSED : CONERROR;
743 if (conn->scheme == SCHEME_HTTPS)
744 if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
746 logputs (LOG_VERBOSE, "\n");
747 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
752 #endif /* HAVE_SSL */
756 logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
757 conn->host, conn->port);
758 /* #### pc_last_fd should be accessed through an accessor
763 #endif /* HAVE_SSL */
764 DEBUGP (("Reusing fd %d.\n", sock));
769 else if (opt.post_file_name || opt.post_data)
777 referer = (char *)alloca (9 + strlen (hs->referer) + 3);
778 sprintf (referer, "Referer: %s\r\n", hs->referer);
781 if (*dt & SEND_NOCACHE)
782 pragma_h = "Pragma: no-cache\r\n";
788 range = (char *)alloca (13 + numdigit (hs->restval) + 4);
789 /* Gag me! Some servers (e.g. WebSitePro) have been known to
790 respond to the following `Range' format by generating a
791 multipart/x-byte-ranges MIME document! This MIME type was
792 present in an old draft of the byteranges specification.
793 HTTP/1.1 specifies a multipart/byte-ranges MIME type, but
794 only if multiple non-overlapping ranges are requested --
795 which Wget never does. */
796 sprintf (range, "Range: bytes=%ld-\r\n", hs->restval);
801 STRDUP_ALLOCA (useragent, opt.useragent);
804 useragent = (char *)alloca (10 + strlen (version_string));
805 sprintf (useragent, "Wget/%s", version_string);
807 /* Construct the authentication, if userid is present. */
810 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
811 user = user ? user : opt.http_user;
812 passwd = passwd ? passwd : opt.http_passwd;
819 /* We have the username and the password, but haven't tried
820 any authorization yet. Let's see if the "Basic" method
821 works. If not, we'll come back here and construct a
822 proper authorization method with the right challenges.
824 If we didn't employ this kind of logic, every URL that
825 requires authorization would have to be processed twice,
826 which is very suboptimal and generates a bunch of false
827 "unauthorized" errors in the server log.
829 #### But this logic also has a serious problem when used
830 with stronger authentications: we *first* transmit the
831 username and the password in clear text, and *then*
832 attempt a stronger authentication scheme. That cannot be
833 right! We are only fortunate that almost everyone still
834 uses the `Basic' scheme anyway.
836 There should be an option to prevent this from happening,
837 for those who use strong authentication schemes and value
839 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
843 /* Use the full path, i.e. one that includes the leading
844 slash and the query string, but is independent of proxy
846 char *pth = url_full_path (u);
847 wwwauth = create_authorization_line (authenticate_h, user, passwd,
856 char *proxy_user, *proxy_passwd;
857 /* For normal username and password, URL components override
858 command-line/wgetrc parameters. With proxy authentication,
859 it's the reverse, because proxy URLs are normally the
860 "permanent" ones, so command-line args should take
862 if (opt.proxy_user && opt.proxy_passwd)
864 proxy_user = opt.proxy_user;
865 proxy_passwd = opt.proxy_passwd;
869 proxy_user = proxy->user;
870 proxy_passwd = proxy->passwd;
872 /* #### This does not appear right. Can't the proxy request,
873 say, `Digest' authentication? */
874 if (proxy_user && proxy_passwd)
875 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
876 "Proxy-Authorization");
879 /* String of the form :PORT. Used only for non-standard ports. */
881 if (u->port != scheme_default_port (u->scheme))
883 port_maybe = (char *)alloca (numdigit (u->port) + 2);
884 sprintf (port_maybe, ":%d", u->port);
887 if (!inhibit_keep_alive)
888 request_keep_alive = "Connection: Keep-Alive\r\n";
890 request_keep_alive = NULL;
893 cookies = build_cookies_request (u->host, u->port, u->path,
895 u->scheme == SCHEME_HTTPS
901 if (opt.post_data || opt.post_file_name)
903 post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n";
905 post_data_size = strlen (opt.post_data);
908 post_data_size = file_size (opt.post_file_name);
909 if (post_data_size == -1)
911 logprintf (LOG_NOTQUIET, "POST data file missing: %s\n",
916 post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1);
917 sprintf (post_content_length,
918 "Content-Length: %ld\r\n", post_data_size);
922 full_path = xstrdup (u->url);
924 /* Use the full path, i.e. one that includes the leading slash and
925 the query string. E.g. if u->path is "foo/bar" and u->query is
926 "param=value", full_path will be "/foo/bar?param=value". */
927 full_path = url_full_path (u);
929 if (strchr (u->host, ':'))
930 squares_around_host = 1;
932 /* Allocate the memory for the request. */
933 request = (char *)alloca (strlen (command)
937 + (port_maybe ? strlen (port_maybe) : 0)
938 + strlen (HTTP_ACCEPT)
939 + (request_keep_alive
940 ? strlen (request_keep_alive) : 0)
941 + (referer ? strlen (referer) : 0)
942 + (cookies ? strlen (cookies) : 0)
943 + (wwwauth ? strlen (wwwauth) : 0)
944 + (proxyauth ? strlen (proxyauth) : 0)
945 + (range ? strlen (range) : 0)
948 ? strlen (post_content_type) : 0)
949 + (post_content_length
950 ? strlen (post_content_length) : 0)
951 + (opt.user_header ? strlen (opt.user_header) : 0)
953 /* Construct the request. */
959 %s%s%s%s%s%s%s%s%s%s\r\n",
962 squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "",
963 port_maybe ? port_maybe : "",
965 request_keep_alive ? request_keep_alive : "",
966 referer ? referer : "",
967 cookies ? cookies : "",
968 wwwauth ? wwwauth : "",
969 proxyauth ? proxyauth : "",
972 post_content_type ? post_content_type : "",
973 post_content_length ? post_content_length : "",
974 opt.user_header ? opt.user_header : "");
975 DEBUGP (("---request begin---\n%s", request));
977 /* Free the temporary memory. */
978 FREE_MAYBE (wwwauth);
979 FREE_MAYBE (proxyauth);
980 FREE_MAYBE (cookies);
983 /* Send the request to server. */
985 if (conn->scheme == SCHEME_HTTPS)
986 write_error = ssl_iwrite (ssl, request, strlen (request));
989 write_error = iwrite (sock, request, strlen (request));
991 if (write_error >= 0)
995 DEBUGP (("[POST data: %s]\n", opt.post_data));
997 if (conn->scheme == SCHEME_HTTPS)
998 write_error = ssl_iwrite (ssl, opt.post_data, post_data_size);
1001 write_error = iwrite (sock, opt.post_data, post_data_size);
1003 else if (opt.post_file_name)
1006 if (conn->scheme == SCHEME_HTTPS)
1007 write_error = post_file (-1, ssl, opt.post_file_name,
1011 write_error = post_file (sock, NULL, opt.post_file_name,
1015 DEBUGP (("---request end---\n"));
1017 if (write_error < 0)
1019 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
1021 CLOSE_INVALIDATE (sock);
1024 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1025 proxy ? "Proxy" : "HTTP");
1026 contlen = contrange = -1;
1031 /* Before reading anything, initialize the rbuf. */
1032 rbuf_initialize (&rbuf, sock);
1034 if (conn->scheme == SCHEME_HTTPS)
1038 #endif /* HAVE_SSL */
1041 /* Header-fetching loop. */
1049 /* Get the header. */
1050 status = header_get (&rbuf, &hdr,
1051 /* Disallow continuations for status line. */
1052 (hcount == 1 ? HG_NO_CONTINUATIONS : HG_NONE));
1054 /* Check for errors. */
1055 if (status == HG_EOF && *hdr)
1057 /* This used to be an unconditional error, but that was
1058 somewhat controversial, because of a large number of
1059 broken CGI's that happily "forget" to send the second EOL
1060 before closing the connection of a HEAD request.
1062 So, the deal is to check whether the header is empty
1063 (*hdr is zero if it is); if yes, it means that the
1064 previous header was fully retrieved, and that -- most
1065 probably -- the request is complete. "...be liberal in
1066 what you accept." Oh boy. */
1067 logputs (LOG_VERBOSE, "\n");
1068 logputs (LOG_NOTQUIET, _("End of file while parsing headers.\n"));
1071 FREE_MAYBE (all_headers);
1072 CLOSE_INVALIDATE (sock);
1075 else if (status == HG_ERROR)
1077 logputs (LOG_VERBOSE, "\n");
1078 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1082 FREE_MAYBE (all_headers);
1083 CLOSE_INVALIDATE (sock);
1087 /* If the headers are to be saved to a file later, save them to
1089 if (opt.save_headers)
1091 int lh = strlen (hdr);
1092 all_headers = (char *)xrealloc (all_headers, all_length + lh + 2);
1093 memcpy (all_headers + all_length, hdr, lh);
1095 all_headers[all_length++] = '\n';
1096 all_headers[all_length] = '\0';
1099 /* Check for status line. */
1103 /* Parse the first line of server response. */
1104 statcode = parse_http_status_line (hdr, &error);
1105 hs->statcode = statcode;
1106 /* Store the descriptive response. */
1107 if (statcode == -1) /* malformed response */
1109 /* A common reason for "malformed response" error is the
1110 case when no data was actually received. Handle this
1113 hs->error = xstrdup (_("No data received"));
1115 hs->error = xstrdup (_("Malformed status line"));
1120 hs->error = xstrdup (_("(no description)"));
1122 hs->error = xstrdup (error);
1124 if ((statcode != -1)
1130 if (opt.server_response)
1131 logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
1133 logprintf (LOG_VERBOSE, "%2d %s", statcode, error);
1139 /* Exit on empty header. */
1146 /* Print the header if requested. */
1147 if (opt.server_response && hcount != 1)
1148 logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
1150 /* Try getting content-length. */
1151 if (contlen == -1 && !opt.ignore_length)
1152 if (header_process (hdr, "Content-Length", header_extract_number,
1155 /* Try getting content-type. */
1157 if (header_process (hdr, "Content-Type", http_process_type, &type))
1159 /* Try getting location. */
1161 if (header_process (hdr, "Location", header_strdup, &hs->newloc))
1163 /* Try getting last-modified. */
1164 if (!hs->remote_time)
1165 if (header_process (hdr, "Last-Modified", header_strdup,
1168 /* Try getting cookies. */
1170 if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u))
1172 /* Try getting www-authentication. */
1173 if (!authenticate_h)
1174 if (header_process (hdr, "WWW-Authenticate", header_strdup,
1177 /* Check for accept-ranges header. If it contains the word
1178 `none', disable the ranges. */
1179 if (*dt & ACCEPTRANGES)
1182 if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep))
1185 *dt &= ~ACCEPTRANGES;
1189 /* Try getting content-range. */
1190 if (contrange == -1)
1192 struct http_process_range_closure closure;
1193 if (header_process (hdr, "Content-Range", http_process_range, &closure))
1195 contrange = closure.first_byte_pos;
1199 /* Check for keep-alive related responses. */
1200 if (!inhibit_keep_alive)
1202 /* Check for the `Keep-Alive' header. */
1203 if (!http_keep_alive_1)
1205 if (header_process (hdr, "Keep-Alive", header_exists,
1206 &http_keep_alive_1))
1209 /* Check for `Connection: Keep-Alive'. */
1210 if (!http_keep_alive_2)
1212 if (header_process (hdr, "Connection", http_process_connection,
1213 &http_keep_alive_2))
1221 logputs (LOG_VERBOSE, "\n");
1224 && (http_keep_alive_1 || http_keep_alive_2))
1226 assert (inhibit_keep_alive == 0);
1230 /* The server has promised that it will not close the connection
1231 when we're done. This means that we can register it. */
1233 register_persistent (conn->host, conn->port, sock);
1235 register_persistent (conn->host, conn->port, sock, ssl);
1236 #endif /* HAVE_SSL */
1238 if ((statcode == HTTP_STATUS_UNAUTHORIZED)
1241 /* Authorization is required. */
1245 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1246 might be more bytes in the body. */
1247 if (auth_tried_already)
1249 /* If we have tried it already, then there is not point
1252 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1253 xfree (authenticate_h);
1256 else if (!known_authentication_scheme_p (authenticate_h))
1258 xfree (authenticate_h);
1259 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1262 else if (BEGINS_WITH (authenticate_h, "Basic"))
1264 /* The authentication scheme is basic, the one we try by
1265 default, and it failed. There's no sense in trying
1271 auth_tried_already = 1;
1275 /* We do not need this anymore. */
1278 xfree (authenticate_h);
1279 authenticate_h = NULL;
1282 /* 20x responses are counted among successful by default. */
1283 if (H_20X (statcode))
1286 /* Return if redirected. */
1287 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1289 /* RFC2068 says that in case of the 300 (multiple choices)
1290 response, the server can output a preferred URL through
1291 `Location' header; otherwise, the request should be treated
1292 like GET. So, if the location is set, it will be a
1293 redirection; otherwise, just proceed normally. */
1294 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1298 logprintf (LOG_VERBOSE,
1299 _("Location: %s%s\n"),
1300 hs->newloc ? hs->newloc : _("unspecified"),
1301 hs->newloc ? _(" [following]") : "");
1302 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1303 might be more bytes in the body. */
1305 FREE_MAYBE (all_headers);
1310 if (type && !strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)))
1313 /* We don't assume text/html by default. */
1316 if (opt.html_extension && (*dt & TEXTHTML))
1317 /* -E / --html-extension / html_extension = on was specified, and this is a
1318 text/html file. If some case-insensitive variation on ".htm[l]" isn't
1319 already the file's suffix, tack on ".html". */
1321 char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
1323 if (last_period_in_local_filename == NULL ||
1324 !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
1325 strcasecmp(last_period_in_local_filename, ".html") == EQ))
1327 size_t local_filename_len = strlen(*hs->local_file);
1329 *hs->local_file = xrealloc(*hs->local_file,
1330 local_filename_len + sizeof(".html"));
1331 strcpy(*hs->local_file + local_filename_len, ".html");
1333 *dt |= ADDED_HTML_EXTENSION;
1337 if (contrange == -1)
1339 /* We did not get a content-range header. This means that the
1340 server did not honor our `Range' request. Normally, this
1341 means we should reset hs->restval and continue normally. */
1343 /* However, if `-c' is used, we need to be a bit more careful:
1345 1. If `-c' is specified and the file already existed when
1346 Wget was started, it would be a bad idea for us to start
1347 downloading it from scratch, effectively truncating it. I
1348 believe this cannot happen unless `-c' was specified.
1350 2. If `-c' is used on a file that is already fully
1351 downloaded, we're requesting bytes after the end of file,
1352 which can result in server not honoring `Range'. If this is
1353 the case, `Content-Length' will be equal to the length of the
1355 if (opt.always_rest)
1357 /* Check for condition #2. */
1358 if (hs->restval > 0 /* restart was requested. */
1359 && contlen != -1 /* we got content-length. */
1360 && hs->restval >= contlen /* file fully downloaded
1364 logputs (LOG_VERBOSE, _("\
1365 \n The file is already fully retrieved; nothing to do.\n\n"));
1366 /* In case the caller inspects. */
1369 /* Mark as successfully retrieved. */
1372 FREE_MAYBE (all_headers);
1373 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1374 might be more bytes in the body. */
1375 return RETRUNNEEDED;
1378 /* Check for condition #1. */
1379 if (hs->no_truncate)
1381 logprintf (LOG_NOTQUIET,
1384 Continued download failed on this file, which conflicts with `-c'.\n\
1385 Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
1387 FREE_MAYBE (all_headers);
1388 CLOSE_INVALIDATE (sock);
1389 return CONTNOTSUPPORTED;
1397 else if (contrange != hs->restval ||
1398 (H_PARTIAL (statcode) && contrange == -1))
1400 /* This means the whole request was somehow misunderstood by the
1401 server. Bail out. */
1403 FREE_MAYBE (all_headers);
1404 CLOSE_INVALIDATE (sock);
1411 contlen += contrange;
1413 contrange = -1; /* If conent-length was not sent,
1414 content-range will be ignored. */
1416 hs->contlen = contlen;
1420 if ((*dt & RETROKF) && !opt.server_response)
1422 /* No need to print this output if the body won't be
1423 downloaded at all, or if the original server response is
1425 logputs (LOG_VERBOSE, _("Length: "));
1428 logputs (LOG_VERBOSE, legible (contlen));
1429 if (contrange != -1)
1430 logprintf (LOG_VERBOSE, _(" (%s to go)"),
1431 legible (contlen - contrange));
1434 logputs (LOG_VERBOSE,
1435 opt.ignore_length ? _("ignored") : _("unspecified"));
1437 logprintf (LOG_VERBOSE, " [%s]\n", type);
1439 logputs (LOG_VERBOSE, "\n");
1443 type = NULL; /* We don't need it any more. */
1445 /* Return if we have no intention of further downloading. */
1446 if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1448 /* In case the caller cares to look... */
1452 FREE_MAYBE (all_headers);
1453 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1454 might be more bytes in the body. */
1455 return RETRFINISHED;
1458 /* Open the local file. */
1461 mkalldirs (*hs->local_file);
1463 rotate_backups (*hs->local_file);
1464 fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
1467 logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
1468 CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
1469 might be more bytes in the body. */
1470 FREE_MAYBE (all_headers);
1476 extern int global_download_count;
1478 /* To ensure that repeated "from scratch" downloads work for -O
1479 files, we rewind the file pointer, unless restval is
1480 non-zero. (This works only when -O is used on regular files,
1481 but it's still a valuable feature.)
1483 However, this loses when more than one URL is specified on
1484 the command line the second rewinds eradicates the contents
1485 of the first download. Thus we disable the above trick for
1486 all the downloads except the very first one.
1488 #### A possible solution to this would be to remember the
1489 file position in the output document and to seek to that
1490 position, instead of rewinding. */
1491 if (!hs->restval && global_download_count == 0)
1493 /* This will silently fail for streams that don't correspond
1494 to regular files, but that's OK. */
1496 /* ftruncate is needed because opt.dfp is opened in append
1497 mode if opt.always_rest is set. */
1498 ftruncate (fileno (fp), 0);
1503 /* #### This confuses the code that checks for file size. There
1504 should be some overhead information. */
1505 if (opt.save_headers)
1506 fwrite (all_headers, 1, all_length, fp);
1508 /* Get the contents of the document. */
1509 hs->res = get_contents (sock, fp, &hs->len, hs->restval,
1510 (contlen != -1 ? contlen : 0),
1511 &rbuf, keep_alive, &hs->dltime);
1514 CLOSE_FINISH (sock);
1516 CLOSE_INVALIDATE (sock);
1519 /* Close or flush the file. We have to be careful to check for
1520 error here. Checking the result of fwrite() is not enough --
1521 errors could go unnoticed! */
1524 flush_res = fclose (fp);
1526 flush_res = fflush (fp);
1527 if (flush_res == EOF)
1530 FREE_MAYBE (all_headers);
1533 return RETRFINISHED;
1536 /* The genuine HTTP loop! This is the part where the retrieval is
1537 retried, and retried, and retried, and... */
1539 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
1540 int *dt, struct url *proxy)
1543 int use_ts, got_head = 0; /* time-stamping info */
1544 char *filename_plus_orig_suffix;
1545 char *local_filename = NULL;
1546 char *tms, *locf, *tmrate;
1548 time_t tml = -1, tmr = -1; /* local and remote time-stamps */
1549 long local_size = 0; /* the size of the local file */
1550 size_t filename_len;
1551 struct http_stat hstat; /* HTTP status */
1555 /* This used to be done in main(), but it's a better idea to do it
1556 here so that we don't go through the hoops if we're just using
1558 if (opt.cookies && opt.cookies_input && !cookies_loaded_p)
1560 load_cookies (opt.cookies_input);
1561 cookies_loaded_p = 1;
1566 /* Warn on (likely bogus) wildcard usage in HTTP. Don't use
1567 has_wildcards_p because it would also warn on `?', and we know that
1568 shows up in CGI paths a *lot*. */
1569 if (strchr (u->url, '*'))
1570 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1572 /* Determine the local filename. */
1573 if (local_file && *local_file)
1574 hstat.local_file = local_file;
1575 else if (local_file)
1577 *local_file = url_filename (u);
1578 hstat.local_file = local_file;
1582 dummy = url_filename (u);
1583 hstat.local_file = &dummy;
1586 if (!opt.output_document)
1587 locf = *hstat.local_file;
1589 locf = opt.output_document;
1591 hstat.referer = referer;
1593 filename_len = strlen (*hstat.local_file);
1594 filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
1596 if (opt.noclobber && file_exists_p (*hstat.local_file))
1598 /* If opt.noclobber is turned on and file already exists, do not
1599 retrieve the file */
1600 logprintf (LOG_VERBOSE, _("\
1601 File `%s' already there, will not retrieve.\n"), *hstat.local_file);
1602 /* If the file is there, we suppose it's retrieved OK. */
1605 /* #### Bogusness alert. */
1606 /* If its suffix is "html" or "htm" or similar, assume text/html. */
1607 if (has_html_suffix_p (*hstat.local_file))
1615 if (opt.timestamping)
1617 boolean local_dot_orig_file_exists = FALSE;
1619 if (opt.backup_converted)
1620 /* If -K is specified, we'll act on the assumption that it was specified
1621 last time these files were downloaded as well, and instead of just
1622 comparing local file X against server file X, we'll compare local
1623 file X.orig (if extant, else X) against server file X. If -K
1624 _wasn't_ specified last time, or the server contains files called
1625 *.orig, -N will be back to not operating correctly with -k. */
1627 /* Would a single s[n]printf() call be faster? --dan
1629 Definitely not. sprintf() is horribly slow. It's a
1630 different question whether the difference between the two
1631 affects a program. Usually I'd say "no", but at one
1632 point I profiled Wget, and found that a measurable and
1633 non-negligible amount of time was lost calling sprintf()
1634 in url.c. Replacing sprintf with inline calls to
1635 strcpy() and long_to_string() made a difference.
1637 memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
1638 memcpy (filename_plus_orig_suffix + filename_len,
1639 ".orig", sizeof (".orig"));
1641 /* Try to stat() the .orig file. */
1642 if (stat (filename_plus_orig_suffix, &st) == 0)
1644 local_dot_orig_file_exists = TRUE;
1645 local_filename = filename_plus_orig_suffix;
1649 if (!local_dot_orig_file_exists)
1650 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1651 if (stat (*hstat.local_file, &st) == 0)
1652 local_filename = *hstat.local_file;
1654 if (local_filename != NULL)
1655 /* There was a local file, so we'll check later to see if the version
1656 the server has is the same version we already have, allowing us to
1662 /* Modification time granularity is 2 seconds for Windows, so
1663 increase local time by 1 second for later comparison. */
1666 local_size = st.st_size;
1670 /* Reset the counter. */
1672 *dt = 0 | ACCEPTRANGES;
1676 /* Increment the pass counter. */
1678 sleep_between_retrievals (count);
1679 /* Get the current time string. */
1680 tms = time_str (NULL);
1681 /* Print fetch message, if opt.verbose. */
1684 char *hurl = url_string (u, 1);
1688 sprintf (tmp, _("(try:%2d)"), count);
1689 logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
1690 tms, hurl, tmp, locf);
1692 ws_changetitle (hurl, 1);
1697 /* Default document type is empty. However, if spider mode is
1698 on or time-stamping is employed, HEAD_ONLY commands is
1699 encoded within *dt. */
1700 if (opt.spider || (use_ts && !got_head))
1704 /* Assume no restarting. */
1706 /* Decide whether or not to restart. */
1707 if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
1708 /* #### this calls access() and then stat(); could be optimized. */
1709 && file_exists_p (locf))
1710 if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
1711 hstat.restval = st.st_size;
1713 /* In `-c' is used and the file is existing and non-empty,
1714 refuse to truncate it if the server doesn't support continued
1716 hstat.no_truncate = 0;
1717 if (opt.always_rest && hstat.restval)
1718 hstat.no_truncate = 1;
1720 /* Decide whether to send the no-cache directive. We send it in
1722 a) we're using a proxy, and we're past our first retrieval.
1723 Some proxies are notorious for caching incomplete data, so
1724 we require a fresh get.
1725 b) caching is explicitly inhibited. */
1726 if ((proxy && count > 1) /* a */
1727 || !opt.allow_cache /* b */
1729 *dt |= SEND_NOCACHE;
1731 *dt &= ~SEND_NOCACHE;
1733 /* Try fetching the document, or at least its head. */
1734 err = gethttp (u, &hstat, dt, proxy);
1736 /* It's unfortunate that wget determines the local filename before finding
1737 out the Content-Type of the file. Barring a major restructuring of the
1738 code, we need to re-set locf here, since gethttp() may have xrealloc()d
1739 *hstat.local_file to tack on ".html". */
1740 if (!opt.output_document)
1741 locf = *hstat.local_file;
1743 locf = opt.output_document;
1746 tms = time_str (NULL);
1747 /* Get the new location (with or without the redirection). */
1749 *newloc = xstrdup (hstat.newloc);
1752 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
1753 case CONERROR: case READERR: case WRITEFAILED:
1755 /* Non-fatal errors continue executing the loop, which will
1756 bring them to "while" statement at the end, to judge
1757 whether the number of tries was exceeded. */
1758 free_hstat (&hstat);
1759 printwhat (count, opt.ntry);
1762 case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
1763 case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
1764 /* Fatal errors just return from the function. */
1765 free_hstat (&hstat);
1769 case FWRITEERR: case FOPENERR:
1770 /* Another fatal error. */
1771 logputs (LOG_VERBOSE, "\n");
1772 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
1773 *hstat.local_file, strerror (errno));
1774 free_hstat (&hstat);
1779 /* Another fatal error. */
1780 logputs (LOG_VERBOSE, "\n");
1781 logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
1782 free_hstat (&hstat);
1787 /* Return the new location to the caller. */
1790 logprintf (LOG_NOTQUIET,
1791 _("ERROR: Redirection (%d) without location.\n"),
1793 free_hstat (&hstat);
1797 free_hstat (&hstat);
1802 /* The file was already fully retrieved. */
1803 free_hstat (&hstat);
1808 /* Deal with you later. */
1811 /* All possibilities should have been exhausted. */
1814 if (!(*dt & RETROKF))
1818 /* #### Ugly ugly ugly! */
1819 char *hurl = url_string (u, 1);
1820 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
1823 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
1824 tms, hstat.statcode, hstat.error);
1825 logputs (LOG_VERBOSE, "\n");
1826 free_hstat (&hstat);
1831 /* Did we get the time-stamp? */
1834 if (opt.timestamping && !hstat.remote_time)
1836 logputs (LOG_NOTQUIET, _("\
1837 Last-modified header missing -- time-stamps turned off.\n"));
1839 else if (hstat.remote_time)
1841 /* Convert the date-string into struct tm. */
1842 tmr = http_atotm (hstat.remote_time);
1843 if (tmr == (time_t) (-1))
1844 logputs (LOG_VERBOSE, _("\
1845 Last-modified header invalid -- time-stamp ignored.\n"));
1849 /* The time-stamping section. */
1854 use_ts = 0; /* no more time-stamping */
1855 count = 0; /* the retrieve count for HEAD is
1857 if (hstat.remote_time && tmr != (time_t) (-1))
1859 /* Now time-stamping can be used validly. Time-stamping
1860 means that if the sizes of the local and remote file
1861 match, and local file is newer than the remote file,
1862 it will not be retrieved. Otherwise, the normal
1863 download procedure is resumed. */
1865 (hstat.contlen == -1 || local_size == hstat.contlen))
1867 logprintf (LOG_VERBOSE, _("\
1868 Server file no newer than local file `%s' -- not retrieving.\n\n"),
1870 free_hstat (&hstat);
1874 else if (tml >= tmr)
1875 logprintf (LOG_VERBOSE, _("\
1876 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
1878 logputs (LOG_VERBOSE,
1879 _("Remote file is newer, retrieving.\n"));
1881 free_hstat (&hstat);
1884 if ((tmr != (time_t) (-1))
1886 && ((hstat.len == hstat.contlen) ||
1887 ((hstat.res == 0) &&
1888 ((hstat.contlen == -1) ||
1889 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
1891 /* #### This code repeats in http.c and ftp.c. Move it to a
1893 const char *fl = NULL;
1894 if (opt.output_document)
1896 if (opt.od_known_regular)
1897 fl = opt.output_document;
1900 fl = *hstat.local_file;
1904 /* End of time-stamping section. */
1908 logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
1913 tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
1915 if (hstat.len == hstat.contlen)
1919 logprintf (LOG_VERBOSE,
1920 _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
1921 tms, tmrate, locf, hstat.len, hstat.contlen);
1922 logprintf (LOG_NONVERBOSE,
1923 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
1924 tms, u->url, hstat.len, hstat.contlen, locf, count);
1927 downloaded_increase (hstat.len);
1929 /* Remember that we downloaded the file for later ".orig" code. */
1930 if (*dt & ADDED_HTML_EXTENSION)
1931 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1933 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1935 free_hstat (&hstat);
1939 else if (hstat.res == 0) /* No read error */
1941 if (hstat.contlen == -1) /* We don't know how much we were supposed
1942 to get, so assume we succeeded. */
1946 logprintf (LOG_VERBOSE,
1947 _("%s (%s) - `%s' saved [%ld]\n\n"),
1948 tms, tmrate, locf, hstat.len);
1949 logprintf (LOG_NONVERBOSE,
1950 "%s URL:%s [%ld] -> \"%s\" [%d]\n",
1951 tms, u->url, hstat.len, locf, count);
1954 downloaded_increase (hstat.len);
1956 /* Remember that we downloaded the file for later ".orig" code. */
1957 if (*dt & ADDED_HTML_EXTENSION)
1958 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1960 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1962 free_hstat (&hstat);
1966 else if (hstat.len < hstat.contlen) /* meaning we lost the
1967 connection too soon */
1969 logprintf (LOG_VERBOSE,
1970 _("%s (%s) - Connection closed at byte %ld. "),
1971 tms, tmrate, hstat.len);
1972 printwhat (count, opt.ntry);
1973 free_hstat (&hstat);
1976 else if (!opt.kill_longer) /* meaning we got more than expected */
1978 logprintf (LOG_VERBOSE,
1979 _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
1980 tms, tmrate, locf, hstat.len, hstat.contlen);
1981 logprintf (LOG_NONVERBOSE,
1982 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
1983 tms, u->url, hstat.len, hstat.contlen, locf, count);
1985 downloaded_increase (hstat.len);
1987 /* Remember that we downloaded the file for later ".orig" code. */
1988 if (*dt & ADDED_HTML_EXTENSION)
1989 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1991 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1993 free_hstat (&hstat);
1997 else /* the same, but not accepted */
1999 logprintf (LOG_VERBOSE,
2000 _("%s (%s) - Connection closed at byte %ld/%ld. "),
2001 tms, tmrate, hstat.len, hstat.contlen);
2002 printwhat (count, opt.ntry);
2003 free_hstat (&hstat);
2007 else /* now hstat.res can only be -1 */
2009 if (hstat.contlen == -1)
2011 logprintf (LOG_VERBOSE,
2012 _("%s (%s) - Read error at byte %ld (%s)."),
2013 tms, tmrate, hstat.len, strerror (errno));
2014 printwhat (count, opt.ntry);
2015 free_hstat (&hstat);
2018 else /* hstat.res == -1 and contlen is given */
2020 logprintf (LOG_VERBOSE,
2021 _("%s (%s) - Read error at byte %ld/%ld (%s). "),
2022 tms, tmrate, hstat.len, hstat.contlen,
2024 printwhat (count, opt.ntry);
2025 free_hstat (&hstat);
2032 while (!opt.ntry || (count < opt.ntry));
2036 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
2037 than local timezone.
2039 mktime is similar but assumes struct tm, also known as the
2040 "broken-down" form of time, is in local time zone. mktime_from_utc
2041 uses mktime to make the conversion understanding that an offset
2042 will be introduced by the local time assumption.
2044 mktime_from_utc then measures the introduced offset by applying
2045 gmtime to the initial result and applying mktime to the resulting
2046 "broken-down" form. The difference between the two mktime results
2047 is the measured offset which is then subtracted from the initial
2048 mktime result to yield a calendar time which is the value returned.
2050 tm_isdst in struct tm is set to 0 to force mktime to introduce a
2051 consistent offset (the non DST offset) since tm and tm+o might be
2052 on opposite sides of a DST change.
2054 Some implementations of mktime return -1 for the nonexistent
2055 localtime hour at the beginning of DST. In this event, use
2056 mktime(tm - 1hr) + 3600.
2060 gmtime(t+o) --> tm+o
2061 mktime(tm+o) --> t+2o
2062 t+o - (t+2o - t+o) = t
2064 Note that glibc contains a function of the same purpose named
2065 `timegm' (reverse of gmtime). But obviously, it is not universally
2066 available, and unfortunately it is not straightforwardly
2067 extractable for use here. Perhaps configure should detect timegm
2068 and use it where available.
2070 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
2071 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO.
2072 Further improved by Roger with assistance from Edward J. Sabol
2073 based on input by Jamie Zawinski. */
2076 mktime_from_utc (struct tm *t)
2087 return -1; /* can't deal with output from strptime */
2098 return -1; /* can't deal with output from gmtime */
2101 return (tl - (tb - tl));
2104 /* Check whether the result of strptime() indicates success.
2105 strptime() returns the pointer to how far it got to in the string.
2106 The processing has been successful if the string is at `GMT' or
2107 `+X', or at the end of the string.
2109 In extended regexp parlance, the function returns 1 if P matches
2110 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
2111 can return) is considered a failure and 0 is returned. */
2113 check_end (const char *p)
2117 while (ISSPACE (*p))
2120 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2121 || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
2127 /* Convert the textual specification of time in TIME_STRING to the
2128 number of seconds since the Epoch.
2130 TIME_STRING can be in any of the three formats RFC2068 allows the
2131 HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date.
2132 Timezones are ignored, and should be GMT.
2134 Return the computed time_t representation, or -1 if the conversion
2137 This function uses strptime with various string formats for parsing
2138 TIME_STRING. This results in a parser that is not as lenient in
2139 interpreting TIME_STRING as I would like it to be. Being based on
2140 strptime, it always allows shortened months, one-digit days, etc.,
2141 but due to the multitude of formats in which time can be
2142 represented, an ideal HTTP time parser would be even more
2143 forgiving. It should completely ignore things like week days and
2144 concentrate only on the various forms of representing years,
2145 months, days, hours, minutes, and seconds. For example, it would
2146 be nice if it accepted ISO 8601 out of the box.
2148 I've investigated free and PD code for this purpose, but none was
2149 usable. getdate was big and unwieldy, and had potential copyright
2150 issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
2151 distributed with phttpd, is excellent, but we cannot use it because
2152 it is not assigned to the FSF. So I stuck it with strptime. */
2155 http_atotm (char *time_string)
2157 /* NOTE: Solaris strptime man page claims that %n and %t match white
2158 space, but that's not universally available. Instead, we simply
2159 use ` ' to mean "skip all WS", which works under all strptime
2160 implementations I've tested. */
2162 static const char *time_formats[] = {
2163 "%a, %d %b %Y %T", /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
2164 "%A, %d-%b-%y %T", /* RFC850: Thursday, 29-Jan-98 22:12:57 */
2165 "%a, %d-%b-%Y %T", /* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
2166 (google.com uses this for their cookies.) */
2167 "%a %b %d %T %Y" /* asctime: Thu Jan 29 22:12:57 1998 */
2173 /* According to Roger Beeman, we need to initialize tm_isdst, since
2174 strptime won't do it. */
2177 /* Note that under foreign locales Solaris strptime() fails to
2178 recognize English dates, which renders this function useless. We
2179 solve this by being careful not to affect LC_TIME when
2180 initializing locale.
2182 Another solution would be to temporarily set locale to C, invoke
2183 strptime(), and restore it back. This is slow and dirty,
2184 however, and locale support other than LC_MESSAGES can mess other
2185 things, so I rather chose to stick with just setting LC_MESSAGES.
2187 GNU strptime does not have this problem because it recognizes
2188 both international and local dates. */
2190 for (i = 0; i < ARRAY_SIZE (time_formats); i++)
2191 if (check_end (strptime (time_string, time_formats[i], &t)))
2192 return mktime_from_utc (&t);
2194 /* All formats have failed. */
2198 /* Authorization support: We support two authorization schemes:
2200 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
2202 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
2203 consisting of answering to the server's challenge with the proper
2206 /* How many bytes it will take to store LEN bytes in base64. */
2207 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
2209 /* Encode the string S of length LENGTH to base64 format and place it
2210 to STORE. STORE will be 0-terminated, and must point to a writable
2211 buffer of at least 1+BASE64_LENGTH(length) bytes. */
2213 base64_encode (const char *s, char *store, int length)
2215 /* Conversion table. */
2216 static char tbl[64] = {
2217 'A','B','C','D','E','F','G','H',
2218 'I','J','K','L','M','N','O','P',
2219 'Q','R','S','T','U','V','W','X',
2220 'Y','Z','a','b','c','d','e','f',
2221 'g','h','i','j','k','l','m','n',
2222 'o','p','q','r','s','t','u','v',
2223 'w','x','y','z','0','1','2','3',
2224 '4','5','6','7','8','9','+','/'
2227 unsigned char *p = (unsigned char *)store;
2229 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
2230 for (i = 0; i < length; i += 3)
2232 *p++ = tbl[s[0] >> 2];
2233 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2234 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2235 *p++ = tbl[s[2] & 0x3f];
2238 /* Pad the result if necessary... */
2239 if (i == length + 1)
2241 else if (i == length + 2)
2242 *(p - 1) = *(p - 2) = '=';
2243 /* ...and zero-terminate it. */
2247 /* Create the authentication header contents for the `Basic' scheme.
2248 This is done by encoding the string `USER:PASS' in base64 and
2249 prepending `HEADER: Basic ' to it. */
2251 basic_authentication_encode (const char *user, const char *passwd,
2254 char *t1, *t2, *res;
2255 int len1 = strlen (user) + 1 + strlen (passwd);
2256 int len2 = BASE64_LENGTH (len1);
2258 t1 = (char *)alloca (len1 + 1);
2259 sprintf (t1, "%s:%s", user, passwd);
2260 t2 = (char *)alloca (1 + len2);
2261 base64_encode (t1, t2, len1);
2262 res = (char *)xmalloc (len2 + 11 + strlen (header));
2263 sprintf (res, "%s: Basic %s\r\n", header, t2);
2269 /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
2270 of a field in such a header. If the field is the one specified by
2271 ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
2272 digest authorization code), extract its value in the (char*)
2273 variable pointed by RET. Returns negative on a malformed header,
2274 or number of bytes that have been parsed by this call. */
2276 extract_header_attr (const char *au, const char *attr_name, char **ret)
2278 const char *cp, *ep;
2282 if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
2284 cp += strlen (attr_name);
2287 cp += skip_lws (cp);
2292 cp += skip_lws (cp);
2297 for (ep = cp; *ep && *ep != '\"'; ep++)
2302 *ret = strdupdelim (cp, ep);
2309 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
2310 an array of 16 bytes containing the hash keys, and BUF should be a
2311 buffer of 33 writable characters (32 for hex digits plus one for
2312 zero termination). */
2314 dump_hash (unsigned char *buf, const unsigned char *hash)
2318 for (i = 0; i < MD5_HASHLEN; i++, hash++)
2320 *buf++ = XDIGIT_TO_xchar (*hash >> 4);
2321 *buf++ = XDIGIT_TO_xchar (*hash & 0xf);
2326 /* Take the line apart to find the challenge, and compose a digest
2327 authorization header. See RFC2069 section 2.1.2. */
2329 digest_authentication_encode (const char *au, const char *user,
2330 const char *passwd, const char *method,
2333 static char *realm, *opaque, *nonce;
2338 { "realm", &realm },
2339 { "opaque", &opaque },
2344 realm = opaque = nonce = NULL;
2346 au += 6; /* skip over `Digest' */
2351 au += skip_lws (au);
2352 for (i = 0; i < ARRAY_SIZE (options); i++)
2354 int skip = extract_header_attr (au, options[i].name,
2355 options[i].variable);
2359 FREE_MAYBE (opaque);
2369 if (i == ARRAY_SIZE (options))
2371 while (*au && *au != '=')
2375 au += skip_lws (au);
2379 while (*au && *au != '\"')
2386 while (*au && *au != ',')
2391 if (!realm || !nonce || !user || !passwd || !path || !method)
2394 FREE_MAYBE (opaque);
2399 /* Calculate the digest value. */
2401 ALLOCA_MD5_CONTEXT (ctx);
2402 unsigned char hash[MD5_HASHLEN];
2403 unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
2404 unsigned char response_digest[MD5_HASHLEN * 2 + 1];
2406 /* A1BUF = H(user ":" realm ":" password) */
2408 gen_md5_update ((unsigned char *)user, strlen (user), ctx);
2409 gen_md5_update ((unsigned char *)":", 1, ctx);
2410 gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
2411 gen_md5_update ((unsigned char *)":", 1, ctx);
2412 gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
2413 gen_md5_finish (ctx, hash);
2414 dump_hash (a1buf, hash);
2416 /* A2BUF = H(method ":" path) */
2418 gen_md5_update ((unsigned char *)method, strlen (method), ctx);
2419 gen_md5_update ((unsigned char *)":", 1, ctx);
2420 gen_md5_update ((unsigned char *)path, strlen (path), ctx);
2421 gen_md5_finish (ctx, hash);
2422 dump_hash (a2buf, hash);
2424 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
2426 gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
2427 gen_md5_update ((unsigned char *)":", 1, ctx);
2428 gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
2429 gen_md5_update ((unsigned char *)":", 1, ctx);
2430 gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
2431 gen_md5_finish (ctx, hash);
2432 dump_hash (response_digest, hash);
2434 res = (char*) xmalloc (strlen (user)
2439 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
2440 + (opaque ? strlen (opaque) : 0)
2442 sprintf (res, "Authorization: Digest \
2443 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
2444 user, realm, nonce, path, response_digest);
2447 char *p = res + strlen (res);
2448 strcat (p, ", opaque=\"");
2452 strcat (res, "\r\n");
2456 #endif /* USE_DIGEST */
2459 #define BEGINS_WITH(line, string_constant) \
2460 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
2461 && (ISSPACE (line[sizeof (string_constant) - 1]) \
2462 || !line[sizeof (string_constant) - 1]))
2465 known_authentication_scheme_p (const char *au)
2467 return BEGINS_WITH (au, "Basic")
2468 || BEGINS_WITH (au, "Digest")
2469 || BEGINS_WITH (au, "NTLM");
2474 /* Create the HTTP authorization request header. When the
2475 `WWW-Authenticate' response header is seen, according to the
2476 authorization scheme specified in that header (`Basic' and `Digest'
2477 are supported by the current implementation), produce an
2478 appropriate HTTP authorization request header. */
2480 create_authorization_line (const char *au, const char *user,
2481 const char *passwd, const char *method,
2484 char *wwwauth = NULL;
2486 if (!strncasecmp (au, "Basic", 5))
2487 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
2488 if (!strncasecmp (au, "NTLM", 4))
2489 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
2491 else if (!strncasecmp (au, "Digest", 6))
2492 wwwauth = digest_authentication_encode (au, user, passwd, method, path);
2493 #endif /* USE_DIGEST */
2500 if (pc_last_host_ip)
2501 address_list_release (pc_last_host_ip);