2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
36 #if TIME_WITH_SYS_TIME
37 # include <sys/time.h>
41 # include <sys/time.h>
65 extern char *version_string;
75 #define TEXTHTML_S "text/html"
76 #define HTTP_ACCEPT "*/*"
78 /* Some status code validation macros: */
79 #define H_20X(x) (((x) >= 200) && ((x) < 300))
80 #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
81 #define H_REDIRECTED(x) (((x) == HTTP_STATUS_MOVED_PERMANENTLY) \
82 || ((x) == HTTP_STATUS_MOVED_TEMPORARILY))
84 /* HTTP/1.0 status codes from RFC1945, provided for reference. */
86 #define HTTP_STATUS_OK 200
87 #define HTTP_STATUS_CREATED 201
88 #define HTTP_STATUS_ACCEPTED 202
89 #define HTTP_STATUS_NO_CONTENT 204
90 #define HTTP_STATUS_PARTIAL_CONTENTS 206
92 /* Redirection 3xx. */
93 #define HTTP_STATUS_MULTIPLE_CHOICES 300
94 #define HTTP_STATUS_MOVED_PERMANENTLY 301
95 #define HTTP_STATUS_MOVED_TEMPORARILY 302
96 #define HTTP_STATUS_NOT_MODIFIED 304
98 /* Client error 4xx. */
99 #define HTTP_STATUS_BAD_REQUEST 400
100 #define HTTP_STATUS_UNAUTHORIZED 401
101 #define HTTP_STATUS_FORBIDDEN 403
102 #define HTTP_STATUS_NOT_FOUND 404
104 /* Server errors 5xx. */
105 #define HTTP_STATUS_INTERNAL 500
106 #define HTTP_STATUS_NOT_IMPLEMENTED 501
107 #define HTTP_STATUS_BAD_GATEWAY 502
108 #define HTTP_STATUS_UNAVAILABLE 503
111 /* Parse the HTTP status line, which is of format:
113 HTTP-Version SP Status-Code SP Reason-Phrase
115 The function returns the status-code, or -1 if the status line is
116 malformed. The pointer to reason-phrase is returned in RP. */
118 parse_http_status_line (const char *line, const char **reason_phrase_ptr)
120 /* (the variables must not be named `major' and `minor', because
121 that breaks compilation with SunOS4 cc.) */
122 int mjr, mnr, statcode;
125 *reason_phrase_ptr = NULL;
127 /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is
128 major version, and Y is minor version. */
129 if (strncmp (line, "HTTP/", 5) != 0)
133 /* Calculate major HTTP version. */
135 for (mjr = 0; ISDIGIT (*line); line++)
136 mjr = 10 * mjr + (*line - '0');
137 if (*line != '.' || p == line)
141 /* Calculate minor HTTP version. */
143 for (mnr = 0; ISDIGIT (*line); line++)
144 mnr = 10 * mnr + (*line - '0');
145 if (*line != ' ' || p == line)
147 /* Wget will accept only 1.0 and higher HTTP-versions. The value of
148 minor version can be safely ignored. */
153 /* Calculate status code. */
154 if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2])))
156 statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0');
158 /* Set up the reason phrase pointer. */
160 /* RFC2068 requires SPC here, but we allow the string to finish
161 here, in case no reason-phrase is present. */
165 *reason_phrase_ptr = line;
170 *reason_phrase_ptr = line + 1;
175 /* Functions to be used as arguments to header_process(): */
177 struct http_process_range_closure {
183 /* Parse the `Content-Range' header and extract the information it
184 contains. Returns 1 if successful, -1 otherwise. */
186 http_process_range (const char *hdr, void *arg)
188 struct http_process_range_closure *closure
189 = (struct http_process_range_closure *)arg;
192 /* Certain versions of Nutscape proxy server send out
193 `Content-Length' without "bytes" specifier, which is a breach of
194 RFC2068 (as well as the HTTP/1.1 draft which was current at the
195 time). But hell, I must support it... */
196 if (!strncasecmp (hdr, "bytes", 5))
199 hdr += skip_lws (hdr);
205 for (num = 0; ISDIGIT (*hdr); hdr++)
206 num = 10 * num + (*hdr - '0');
207 if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
209 closure->first_byte_pos = num;
211 for (num = 0; ISDIGIT (*hdr); hdr++)
212 num = 10 * num + (*hdr - '0');
213 if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
215 closure->last_byte_pos = num;
217 for (num = 0; ISDIGIT (*hdr); hdr++)
218 num = 10 * num + (*hdr - '0');
219 closure->entity_length = num;
223 /* Place 1 to ARG if the HDR contains the word "none", 0 otherwise.
224 Used for `Accept-Ranges'. */
226 http_process_none (const char *hdr, void *arg)
228 int *where = (int *)arg;
230 if (strstr (hdr, "none"))
237 /* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */
239 http_process_type (const char *hdr, void *arg)
241 char **result = (char **)arg;
242 /* Locate P on `;' or the terminating zero, whichever comes first. */
243 const char *p = strchr (hdr, ';');
245 p = hdr + strlen (hdr);
246 while (p > hdr && ISSPACE (*(p - 1)))
248 *result = strdupdelim (hdr, p);
252 /* Check whether the `Connection' header is set to "keep-alive". */
254 http_process_connection (const char *hdr, void *arg)
256 int *flag = (int *)arg;
257 if (!strcasecmp (hdr, "Keep-Alive"))
262 /* Persistent connections. Currently, we cache the most recently used
263 connection as persistent, provided that the HTTP server agrees to
264 make it such. The persistence data is stored in the variables
265 below. Ideally, it would be in a structure, and it should be
266 possible to cache an arbitrary fixed number of these connections.
268 I think the code is quite easy to extend in that direction. */
270 /* Whether a persistent connection is active. */
271 static int pc_active_p;
273 /* Host and port of currently active persistent connection. */
274 static unsigned char pc_last_host[4];
275 static unsigned short pc_last_port;
277 /* File descriptor of the currently active persistent connection. */
278 static int pc_last_fd;
280 /* Mark the persistent connection as invalid. This is used by the
281 CLOSE_* macros after they forcefully close a registered persistent
282 connection. This does not close the file descriptor -- it is left
283 to the caller to do that. (Maybe it should, though.) */
286 invalidate_persistent (void)
289 DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
292 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
293 persistent. This will enable someone to use the same connection
294 later. In the context of HTTP, this must be called only AFTER the
295 response has been received and the server has promised that the
296 connection will remain alive.
298 If a previous connection was persistent, it is closed. */
301 register_persistent (const char *host, unsigned short port, int fd)
307 if (pc_last_fd == fd)
309 /* The connection FD is already registered. Nothing to
315 /* The old persistent connection is still active; let's
316 close it first. This situation arises whenever a
317 persistent connection exists, but we then connect to a
318 different host, and try to register a persistent
319 connection to that one. */
321 invalidate_persistent ();
325 /* This store_hostaddress may not fail, because it has the results
327 success = store_hostaddress (pc_last_host, host);
332 DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
335 /* Return non-zero if a persistent connection is available for
336 connecting to HOST:PORT. */
339 persistent_available_p (const char *host, unsigned short port)
341 unsigned char this_host[4];
342 /* First, check whether a persistent connection is active at all. */
345 /* Second, check if the active connection pertains to the correct
346 (HOST, PORT) ordered pair. */
347 if (port != pc_last_port)
349 if (!store_hostaddress (this_host, host))
351 if (memcmp (pc_last_host, this_host, 4))
353 /* Third: check whether the connection is still open. This is
354 important because most server implement a liberal (short) timeout
355 on persistent connections. Wget can of course always reconnect
356 if the connection doesn't work out, but it's nicer to know in
357 advance. This test is a logical followup of the first test, but
358 is "expensive" and therefore placed at the end of the list. */
359 if (!test_socket_open (pc_last_fd))
361 /* Oops, the socket is no longer open. Now that we know that,
362 let's invalidate the persistent connection before returning
365 invalidate_persistent ();
371 /* The idea behind these two CLOSE macros is to distinguish between
372 two cases: one when the job we've been doing is finished, and we
373 want to close the connection and leave, and two when something is
374 seriously wrong and we're closing the connection as part of
377 In case of keep_alive, CLOSE_FINISH should leave the connection
378 open, while CLOSE_INVALIDATE should still close it.
380 Note that the semantics of the flag `keep_alive' is "this
381 connection *will* be reused (the server has promised not to close
382 the connection once we're done)", while the semantics of
383 `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
384 active, registered connection". */
386 #define CLOSE_FINISH(fd) do { \
390 if (pc_active_p && (fd) == pc_last_fd) \
391 invalidate_persistent (); \
395 #define CLOSE_INVALIDATE(fd) do { \
397 if (pc_active_p && (fd) == pc_last_fd) \
398 invalidate_persistent (); \
404 long len; /* received length */
405 long contlen; /* expected length */
406 long restval; /* the restart value */
407 int res; /* the result of last read */
408 char *newloc; /* new location (redirection) */
409 char *remote_time; /* remote time-stamp string */
410 char *error; /* textual HTTP error */
411 int statcode; /* status code */
412 long dltime; /* time of the download */
415 /* Free the elements of hstat X. */
416 #define FREEHSTAT(x) do \
418 FREE_MAYBE ((x).newloc); \
419 FREE_MAYBE ((x).remote_time); \
420 FREE_MAYBE ((x).error); \
421 (x).newloc = (x).remote_time = (x).error = NULL; \
424 static char *create_authorization_line PARAMS ((const char *, const char *,
425 const char *, const char *,
427 static char *basic_authentication_encode PARAMS ((const char *, const char *,
429 static int known_authentication_scheme_p PARAMS ((const char *));
431 static time_t http_atotm PARAMS ((char *));
433 #define BEGINS_WITH(line, string_constant) \
434 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
435 && (ISSPACE (line[sizeof (string_constant) - 1]) \
436 || !line[sizeof (string_constant) - 1]))
438 /* Retrieve a document through HTTP protocol. It recognizes status
439 code, and correctly handles redirections. It closes the network
440 socket. If it receives an error from the functions below it, it
441 will print it if there is enough information to do so (almost
442 always), returning the error to the caller (i.e. http_loop).
444 Various HTTP parameters are stored to hs. Although it parses the
445 response code correctly, it is not used in a sane way. The caller
448 If u->proxy is non-NULL, the URL u will be taken as a proxy URL,
449 and u->proxy->url will be given to the proxy server (bad naming,
452 gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
454 char *request, *type, *command, *path;
456 char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost;
457 char *authenticate_h;
461 char *request_keep_alive;
462 int sock, hcount, num_written, all_length, remport, statcode;
463 long contlen, contrange;
467 int auth_tried_already;
470 /* Whether this connection will be kept alive after the HTTP request
474 /* Flags that detect the two ways of specifying HTTP keep-alive
476 int http_keep_alive_1, http_keep_alive_2;
478 /* Whether keep-alive should be inhibited. */
479 int inhibit_keep_alive;
481 if (!(*dt & HEAD_ONLY))
482 /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
483 know the local filename so we can save to it. */
484 assert (u->local != NULL);
487 auth_tried_already = 0;
489 inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL);
492 /* We need to come back here when the initial attempt to retrieve
493 without authorization header fails. (Expected to happen at least
494 for the Digest authorization scheme.) */
497 http_keep_alive_1 = http_keep_alive_2 = 0;
499 /* Initialize certain elements of struct http_stat. */
504 hs->remote_time = NULL;
507 /* Which structure to use to retrieve the original URL data. */
513 /* First: establish the connection. */
514 if (inhibit_keep_alive
515 || !persistent_available_p (u->host, u->port))
517 logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
518 err = make_connection (&sock, u->host, u->port);
522 logputs (LOG_VERBOSE, "\n");
523 logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
527 logputs (LOG_VERBOSE, "\n");
528 logprintf (LOG_NOTQUIET, "socket: %s\n", strerror (errno));
532 logputs (LOG_VERBOSE, "\n");
533 logprintf (LOG_NOTQUIET,
534 _("Connection to %s:%hu refused.\n"), u->host, u->port);
538 logputs (LOG_VERBOSE, "\n");
539 logprintf (LOG_NOTQUIET, "connect: %s\n", strerror (errno));
544 /* Everything is fine! */
545 logputs (LOG_VERBOSE, _("connected!\n"));
554 logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
555 /* #### pc_last_fd should be accessed through an accessor
558 DEBUGP (("Reusing fd %d.\n", sock));
562 path = u->proxy->url;
566 command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
570 referer = (char *)alloca (9 + strlen (ou->referer) + 3);
571 sprintf (referer, "Referer: %s\r\n", ou->referer);
573 if (*dt & SEND_NOCACHE)
574 pragma_h = "Pragma: no-cache\r\n";
579 range = (char *)alloca (13 + numdigit (hs->restval) + 4);
580 /* Gag me! Some servers (e.g. WebSitePro) have been known to
581 respond to the following `Range' format by generating a
582 multipart/x-byte-ranges MIME document! This MIME type was
583 present in an old draft of the byteranges specification.
584 HTTP/1.1 specifies a multipart/byte-ranges MIME type, but
585 only if multiple non-overlapping ranges are requested --
586 which Wget never does. */
587 sprintf (range, "Range: bytes=%ld-\r\n", hs->restval);
592 STRDUP_ALLOCA (useragent, opt.useragent);
595 useragent = (char *)alloca (10 + strlen (version_string));
596 sprintf (useragent, "Wget/%s", version_string);
598 /* Construct the authentication, if userid is present. */
601 search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
602 user = user ? user : opt.http_user;
603 passwd = passwd ? passwd : opt.http_passwd;
610 /* We have the username and the password, but haven't tried
611 any authorization yet. Let's see if the "Basic" method
612 works. If not, we'll come back here and construct a
613 proper authorization method with the right challenges.
615 If we didn't employ this kind of logic, every URL that
616 requires authorization would have to be processed twice,
617 which is very suboptimal and generates a bunch of false
618 "unauthorized" errors in the server log.
620 #### But this logic also has a serious problem when used
621 with stronger authentications: we *first* transmit the
622 username and the password in clear text, and *then*
623 attempt a stronger authentication scheme. That cannot be
624 right! We are only fortunate that almost everyone still
625 uses the `Basic' scheme anyway.
627 There should be an option to prevent this from happening,
628 for those who use strong authentication schemes and value
630 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
634 wwwauth = create_authorization_line (authenticate_h, user, passwd,
642 char *proxy_user, *proxy_passwd;
643 /* For normal username and password, URL components override
644 command-line/wgetrc parameters. With proxy authentication,
645 it's the reverse, because proxy URLs are normally the
646 "permanent" ones, so command-line args should take
648 if (opt.proxy_user && opt.proxy_passwd)
650 proxy_user = opt.proxy_user;
651 proxy_passwd = opt.proxy_passwd;
655 proxy_user = u->user;
656 proxy_passwd = u->passwd;
658 /* #### This is junky. Can't the proxy request, say, `Digest'
660 if (proxy_user && proxy_passwd)
661 proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
662 "Proxy-Authorization");
667 /* String of the form :PORT. Used only for non-standard ports. */
671 port_maybe = (char *)alloca (numdigit (remport) + 2);
672 sprintf (port_maybe, ":%d", remport);
675 if (!inhibit_keep_alive)
676 request_keep_alive = "Connection: Keep-Alive\r\n";
678 request_keep_alive = NULL;
680 /* Allocate the memory for the request. */
681 request = (char *)alloca (strlen (command) + strlen (path)
684 + (port_maybe ? strlen (port_maybe) : 0)
685 + strlen (HTTP_ACCEPT)
686 + (request_keep_alive
687 ? strlen (request_keep_alive) : 0)
688 + (referer ? strlen (referer) : 0)
689 + (wwwauth ? strlen (wwwauth) : 0)
690 + (proxyauth ? strlen (proxyauth) : 0)
691 + (range ? strlen (range) : 0)
693 + (opt.user_header ? strlen (opt.user_header) : 0)
695 /* Construct the request. */
702 command, path, useragent, remhost,
703 port_maybe ? port_maybe : "",
705 request_keep_alive ? request_keep_alive : "",
706 referer ? referer : "",
707 wwwauth ? wwwauth : "",
708 proxyauth ? proxyauth : "",
711 opt.user_header ? opt.user_header : "");
712 DEBUGP (("---request begin---\n%s---request end---\n", request));
713 /* Free the temporary memory. */
714 FREE_MAYBE (wwwauth);
715 FREE_MAYBE (proxyauth);
717 /* Send the request to server. */
718 num_written = iwrite (sock, request, strlen (request));
721 logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
723 CLOSE_INVALIDATE (sock);
726 logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
727 u->proxy ? "Proxy" : "HTTP");
728 contlen = contrange = -1;
733 /* Before reading anything, initialize the rbuf. */
734 rbuf_initialize (&rbuf, sock);
738 /* Header-fetching loop. */
746 /* Get the header. */
747 status = header_get (&rbuf, &hdr,
748 /* Disallow continuations for status line. */
749 (hcount == 1 ? HG_NO_CONTINUATIONS : HG_NONE));
751 /* Check for errors. */
752 if (status == HG_EOF && *hdr)
754 /* This used to be an unconditional error, but that was
755 somewhat controversial, because of a large number of
756 broken CGI's that happily "forget" to send the second EOL
757 before closing the connection of a HEAD request.
759 So, the deal is to check whether the header is empty
760 (*hdr is zero if it is); if yes, it means that the
761 previous header was fully retrieved, and that -- most
762 probably -- the request is complete. "...be liberal in
763 what you accept." Oh boy. */
764 logputs (LOG_VERBOSE, "\n");
765 logputs (LOG_NOTQUIET, _("End of file while parsing headers.\n"));
768 FREE_MAYBE (hs->newloc);
769 FREE_MAYBE (all_headers);
770 CLOSE_INVALIDATE (sock);
773 else if (status == HG_ERROR)
775 logputs (LOG_VERBOSE, "\n");
776 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
780 FREE_MAYBE (hs->newloc);
781 FREE_MAYBE (all_headers);
782 CLOSE_INVALIDATE (sock);
786 /* If the headers are to be saved to a file later, save them to
788 if (opt.save_headers)
790 int lh = strlen (hdr);
791 all_headers = (char *)xrealloc (all_headers, all_length + lh + 2);
792 memcpy (all_headers + all_length, hdr, lh);
794 all_headers[all_length++] = '\n';
795 all_headers[all_length] = '\0';
798 /* Print the header if requested. */
799 if (opt.server_response && hcount != 1)
800 logprintf (LOG_VERBOSE, "\n%d %s", hcount, hdr);
802 /* Check for status line. */
806 /* Parse the first line of server response. */
807 statcode = parse_http_status_line (hdr, &error);
808 hs->statcode = statcode;
809 /* Store the descriptive response. */
810 if (statcode == -1) /* malformed response */
812 /* A common reason for "malformed response" error is the
813 case when no data was actually received. Handle this
816 hs->error = xstrdup (_("No data received"));
818 hs->error = xstrdup (_("Malformed status line"));
823 hs->error = xstrdup (_("(no description)"));
825 hs->error = xstrdup (error);
832 logprintf (LOG_VERBOSE, "%d %s", statcode, error);
837 /* Exit on empty header. */
844 /* Try getting content-length. */
845 if (contlen == -1 && !opt.ignore_length)
846 if (header_process (hdr, "Content-Length", header_extract_number,
849 /* Try getting content-type. */
851 if (header_process (hdr, "Content-Type", http_process_type, &type))
853 /* Try getting location. */
855 if (header_process (hdr, "Location", header_strdup, &hs->newloc))
857 /* Try getting last-modified. */
858 if (!hs->remote_time)
859 if (header_process (hdr, "Last-Modified", header_strdup,
862 /* Try getting www-authentication. */
864 if (header_process (hdr, "WWW-Authenticate", header_strdup,
867 /* Check for accept-ranges header. If it contains the word
868 `none', disable the ranges. */
869 if (*dt & ACCEPTRANGES)
872 if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep))
875 *dt &= ~ACCEPTRANGES;
879 /* Try getting content-range. */
882 struct http_process_range_closure closure;
883 if (header_process (hdr, "Content-Range", http_process_range, &closure))
885 contrange = closure.first_byte_pos;
889 /* Check for keep-alive related responses. */
890 if (!inhibit_keep_alive)
892 /* Check for the `Keep-Alive' header. */
893 if (!http_keep_alive_1)
895 if (header_process (hdr, "Keep-Alive", header_exists,
899 /* Check for `Connection: Keep-Alive'. */
900 if (!http_keep_alive_2)
902 if (header_process (hdr, "Connection", http_process_connection,
911 logputs (LOG_VERBOSE, "\n");
914 && (http_keep_alive_1 || http_keep_alive_2))
916 assert (inhibit_keep_alive == 0);
920 /* The server has promised that it will not close the connection
921 when we're done. This means that we can register it. */
922 register_persistent (u->host, u->port, sock);
924 if ((statcode == HTTP_STATUS_UNAUTHORIZED)
927 /* Authorization is required. */
932 if (auth_tried_already)
934 /* If we have tried it already, then there is not point
937 logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
938 free (authenticate_h);
941 else if (!known_authentication_scheme_p (authenticate_h))
943 free (authenticate_h);
944 logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
947 else if (BEGINS_WITH (authenticate_h, "Basic"))
949 /* The authentication scheme is basic, the one we try by
950 default, and it failed. There's no sense in trying
956 auth_tried_already = 1;
960 /* We do not need this anymore. */
963 free (authenticate_h);
964 authenticate_h = NULL;
967 /* 20x responses are counted among successful by default. */
968 if (H_20X (statcode))
971 if (type && !strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)))
974 /* We don't assume text/html by default. */
977 if (opt.html_extension && (*dt & TEXTHTML))
978 /* -E / --html-extension / html_extension = on was specified, and this is a
979 text/html file. If some case-insensitive variation on ".htm[l]" isn't
980 already the file's suffix, tack on ".html". */
982 char* last_period_in_local_filename = strrchr(u->local, '.');
984 if (last_period_in_local_filename == NULL ||
985 !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
986 strcasecmp(last_period_in_local_filename, ".html") == EQ))
988 size_t local_filename_len = strlen(u->local);
990 u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
991 strcpy(u->local + local_filename_len, ".html");
993 *dt |= ADDED_HTML_EXTENSION;
999 else if (contrange != hs->restval ||
1000 (H_PARTIAL (statcode) && contrange == -1))
1002 /* This means the whole request was somehow misunderstood by the
1003 server. Bail out. */
1005 FREE_MAYBE (hs->newloc);
1006 FREE_MAYBE (all_headers);
1007 CLOSE_INVALIDATE (sock);
1014 contlen += contrange;
1016 contrange = -1; /* If conent-length was not sent,
1017 content-range will be ignored. */
1019 hs->contlen = contlen;
1021 /* Return if redirected. */
1022 if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
1024 /* RFC2068 says that in case of the 300 (multiple choices)
1025 response, the server can output a preferred URL through
1026 `Location' header; otherwise, the request should be treated
1027 like GET. So, if the location is set, it will be a
1028 redirection; otherwise, just proceed normally. */
1029 if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
1033 logprintf (LOG_VERBOSE,
1034 _("Location: %s%s\n"),
1035 hs->newloc ? hs->newloc : _("unspecified"),
1036 hs->newloc ? _(" [following]") : "");
1037 CLOSE_FINISH (sock);
1039 FREE_MAYBE (all_headers);
1045 if ((*dt & RETROKF) && !opt.server_response)
1047 /* No need to print this output if the body won't be
1048 downloaded at all, or if the original server response is
1050 logputs (LOG_VERBOSE, _("Length: "));
1053 logputs (LOG_VERBOSE, legible (contlen));
1054 if (contrange != -1)
1055 logprintf (LOG_VERBOSE, _(" (%s to go)"),
1056 legible (contlen - contrange));
1059 logputs (LOG_VERBOSE,
1060 opt.ignore_length ? _("ignored") : _("unspecified"));
1062 logprintf (LOG_VERBOSE, " [%s]\n", type);
1064 logputs (LOG_VERBOSE, "\n");
1068 type = NULL; /* We don't need it any more. */
1070 /* Return if we have no intention of further downloading. */
1071 if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
1073 /* In case someone cares to look... */
1077 FREE_MAYBE (all_headers);
1078 CLOSE_FINISH (sock);
1079 return RETRFINISHED;
1082 /* Open the local file. */
1085 mkalldirs (u->local);
1087 rotate_backups (u->local);
1088 fp = fopen (u->local, hs->restval ? "ab" : "wb");
1091 logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
1092 CLOSE_FINISH (sock);
1093 FREE_MAYBE (all_headers);
1102 /* This will silently fail for streams that don't correspond
1103 to regular files, but that's OK. */
1109 /* #### This confuses the code that checks for file size. There
1110 should be some overhead information. */
1111 if (opt.save_headers)
1112 fwrite (all_headers, 1, all_length, fp);
1114 /* Get the contents of the document. */
1115 hs->res = get_contents (sock, fp, &hs->len, hs->restval,
1116 (contlen != -1 ? contlen : 0),
1118 hs->dltime = elapsed_time ();
1120 /* Close or flush the file. We have to be careful to check for
1121 error here. Checking the result of fwrite() is not enough --
1122 errors could go unnoticed! */
1125 flush_res = fclose (fp);
1127 flush_res = fflush (fp);
1128 if (flush_res == EOF)
1131 FREE_MAYBE (all_headers);
1132 CLOSE_FINISH (sock);
1135 return RETRFINISHED;
1138 /* The genuine HTTP loop! This is the part where the retrieval is
1139 retried, and retried, and retried, and... */
1141 http_loop (struct urlinfo *u, char **newloc, int *dt)
1143 static int first_retrieval = 1;
1146 int use_ts, got_head = 0; /* time-stamping info */
1147 char *filename_plus_orig_suffix;
1148 char *local_filename = NULL;
1149 char *tms, *suf, *locf, *tmrate;
1151 time_t tml = -1, tmr = -1; /* local and remote time-stamps */
1152 long local_size = 0; /* the size of the local file */
1153 size_t filename_len;
1154 struct http_stat hstat; /* HTTP status */
1159 /* Warn on (likely bogus) wildcard usage in HTTP. Don't use
1160 has_wildcards_p because it would also warn on `?', and we know that
1161 shows up in CGI paths a *lot*. */
1162 if (strchr (u->url, '*'))
1163 logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
1165 /* Determine the local filename. */
1167 u->local = url_filename (u->proxy ? u->proxy : u);
1169 if (!opt.output_document)
1172 locf = opt.output_document;
1174 /* Yuck. Multiple returns suck. We need to remember to free() the space we
1175 xmalloc() here before EACH return. This is one reason it's better to set
1176 flags that influence flow control and then return once at the end. */
1177 filename_len = strlen(u->local);
1178 filename_plus_orig_suffix = xmalloc(filename_len + sizeof(".orig"));
1180 if (opt.noclobber && file_exists_p (u->local))
1182 /* If opt.noclobber is turned on and file already exists, do not
1183 retrieve the file */
1184 logprintf (LOG_VERBOSE, _("\
1185 File `%s' already there, will not retrieve.\n"), u->local);
1186 /* If the file is there, we suppose it's retrieved OK. */
1189 /* #### Bogusness alert. */
1190 /* If its suffix is "html" or (yuck!) "htm", we suppose it's
1191 text/html, a harmless lie. */
1192 if (((suf = suffix (u->local)) != NULL)
1193 && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
1196 free(filename_plus_orig_suffix); /* must precede every return! */
1197 /* Another harmless lie: */
1202 if (opt.timestamping)
1204 boolean local_dot_orig_file_exists = FALSE;
1206 if (opt.backup_converted)
1207 /* If -K is specified, we'll act on the assumption that it was specified
1208 last time these files were downloaded as well, and instead of just
1209 comparing local file X against server file X, we'll compare local
1210 file X.orig (if extant, else X) against server file X. If -K
1211 _wasn't_ specified last time, or the server contains files called
1212 *.orig, -N will be back to not operating correctly with -k. */
1214 /* Would a single s[n]printf() call be faster? --dan
1216 It wouldn't. sprintf() is horribly slow. At one point I
1217 profiled Wget, and found that a measurable and
1218 non-negligible amount of time was lost calling sprintf()
1219 in url.c. Replacing sprintf with inline calls to
1220 strcpy() and long_to_string() made a difference.
1222 strcpy(filename_plus_orig_suffix, u->local);
1223 strcpy(filename_plus_orig_suffix + filename_len, ".orig");
1225 /* Try to stat() the .orig file. */
1226 if (stat(filename_plus_orig_suffix, &st) == 0)
1228 local_dot_orig_file_exists = TRUE;
1229 local_filename = filename_plus_orig_suffix;
1233 if (!local_dot_orig_file_exists)
1234 /* Couldn't stat() <file>.orig, so try to stat() <file>. */
1235 if (stat (u->local, &st) == 0)
1236 local_filename = u->local;
1238 if (local_filename != NULL)
1239 /* There was a local file, so we'll check later to see if the version
1240 the server has is the same version we already have, allowing us to
1245 local_size = st.st_size;
1249 /* Reset the counter. */
1251 *dt = 0 | ACCEPTRANGES;
1255 /* Increment the pass counter. */
1257 /* Wait before the retrieval (unless this is the very first
1259 Check if we are retrying or not, wait accordingly - HEH */
1260 if (!first_retrieval && (opt.wait || (count && opt.waitretry)))
1264 if (count<opt.waitretry)
1267 sleep(opt.waitretry);
1272 if (first_retrieval)
1273 first_retrieval = 0;
1274 /* Get the current time string. */
1275 tms = time_str (NULL);
1276 /* Print fetch message, if opt.verbose. */
1279 char *hurl = str_url (u->proxy ? u->proxy : u, 1);
1283 sprintf (tmp, _("(try:%2d)"), count);
1284 logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
1285 tms, hurl, tmp, locf);
1287 ws_changetitle (hurl, 1);
1292 /* Default document type is empty. However, if spider mode is
1293 on or time-stamping is employed, HEAD_ONLY commands is
1294 encoded within *dt. */
1295 if (opt.spider || (use_ts && !got_head))
1299 /* Assume no restarting. */
1301 /* Decide whether or not to restart. */
1302 if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
1303 && file_exists_p (u->local))
1304 if (stat (u->local, &st) == 0)
1305 hstat.restval = st.st_size;
1306 /* Decide whether to send the no-cache directive. */
1307 if (u->proxy && (count > 1 || (opt.proxy_cache == 0)))
1308 *dt |= SEND_NOCACHE;
1310 *dt &= ~SEND_NOCACHE;
1312 /* Try fetching the document, or at least its head. :-) */
1313 err = gethttp (u, &hstat, dt);
1315 /* It's unfortunate that wget determines the local filename before finding
1316 out the Content-Type of the file. Barring a major restructuring of the
1317 code, we need to re-set locf here, since gethttp() may have xrealloc()d
1318 u->local to tack on ".html". */
1319 if (!opt.output_document)
1322 locf = opt.output_document;
1325 tms = time_str (NULL);
1326 /* Get the new location (with or without the redirection). */
1328 *newloc = xstrdup (hstat.newloc);
1331 case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
1332 case CONERROR: case READERR: case WRITEFAILED:
1334 /* Non-fatal errors continue executing the loop, which will
1335 bring them to "while" statement at the end, to judge
1336 whether the number of tries was exceeded. */
1338 printwhat (count, opt.ntry);
1341 case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
1342 /* Fatal errors just return from the function. */
1344 free(filename_plus_orig_suffix); /* must precede every return! */
1347 case FWRITEERR: case FOPENERR:
1348 /* Another fatal error. */
1349 logputs (LOG_VERBOSE, "\n");
1350 logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
1351 u->local, strerror (errno));
1353 free(filename_plus_orig_suffix); /* must precede every return! */
1357 /* Return the new location to the caller. */
1360 logprintf (LOG_NOTQUIET,
1361 _("ERROR: Redirection (%d) without location.\n"),
1363 free(filename_plus_orig_suffix); /* must precede every return! */
1367 free(filename_plus_orig_suffix); /* must precede every return! */
1371 /* Deal with you later. */
1374 /* All possibilities should have been exhausted. */
1377 if (!(*dt & RETROKF))
1381 /* #### Ugly ugly ugly! */
1382 char *hurl = str_url (u->proxy ? u->proxy : u, 1);
1383 logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
1386 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
1387 tms, hstat.statcode, hstat.error);
1388 logputs (LOG_VERBOSE, "\n");
1390 free(filename_plus_orig_suffix); /* must precede every return! */
1394 /* Did we get the time-stamp? */
1397 if (opt.timestamping && !hstat.remote_time)
1399 logputs (LOG_NOTQUIET, _("\
1400 Last-modified header missing -- time-stamps turned off.\n"));
1402 else if (hstat.remote_time)
1404 /* Convert the date-string into struct tm. */
1405 tmr = http_atotm (hstat.remote_time);
1406 if (tmr == (time_t) (-1))
1407 logputs (LOG_VERBOSE, _("\
1408 Last-modified header invalid -- time-stamp ignored.\n"));
1412 /* The time-stamping section. */
1417 use_ts = 0; /* no more time-stamping */
1418 count = 0; /* the retrieve count for HEAD is
1420 if (hstat.remote_time && tmr != (time_t) (-1))
1422 /* Now time-stamping can be used validly. Time-stamping
1423 means that if the sizes of the local and remote file
1424 match, and local file is newer than the remote file,
1425 it will not be retrieved. Otherwise, the normal
1426 download procedure is resumed. */
1428 (hstat.contlen == -1 || local_size == hstat.contlen))
1430 logprintf (LOG_VERBOSE, _("\
1431 Server file no newer than local file `%s' -- not retrieving.\n\n"),
1434 free(filename_plus_orig_suffix);/*must precede every return!*/
1437 else if (tml >= tmr)
1438 logprintf (LOG_VERBOSE, _("\
1439 The sizes do not match (local %ld) -- retrieving.\n"), local_size);
1441 logputs (LOG_VERBOSE,
1442 _("Remote file is newer, retrieving.\n"));
1448 && (tmr != (time_t) (-1))
1450 && ((hstat.len == hstat.contlen) ||
1451 ((hstat.res == 0) &&
1452 ((hstat.contlen == -1) ||
1453 (hstat.len >= hstat.contlen && !opt.kill_longer)))))
1455 touch (u->local, tmr);
1457 /* End of time-stamping section. */
1461 logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
1462 free(filename_plus_orig_suffix); /* must precede every return! */
1466 /* It is now safe to free the remainder of hstat, since the
1467 strings within it will no longer be used. */
1470 tmrate = rate (hstat.len - hstat.restval, hstat.dltime);
1472 if (hstat.len == hstat.contlen)
1476 logprintf (LOG_VERBOSE,
1477 _("%s (%s) - `%s' saved [%ld/%ld]\n\n"),
1478 tms, tmrate, locf, hstat.len, hstat.contlen);
1479 logprintf (LOG_NONVERBOSE,
1480 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
1481 tms, u->url, hstat.len, hstat.contlen, locf, count);
1484 downloaded_increase (hstat.len);
1486 /* Remember that we downloaded the file for later ".orig" code. */
1487 if (*dt & ADDED_HTML_EXTENSION)
1488 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1490 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1492 free(filename_plus_orig_suffix); /* must precede every return! */
1495 else if (hstat.res == 0) /* No read error */
1497 if (hstat.contlen == -1) /* We don't know how much we were supposed
1498 to get, so assume we succeeded. */
1502 logprintf (LOG_VERBOSE,
1503 _("%s (%s) - `%s' saved [%ld]\n\n"),
1504 tms, tmrate, locf, hstat.len);
1505 logprintf (LOG_NONVERBOSE,
1506 "%s URL:%s [%ld] -> \"%s\" [%d]\n",
1507 tms, u->url, hstat.len, locf, count);
1510 downloaded_increase (hstat.len);
1512 /* Remember that we downloaded the file for later ".orig" code. */
1513 if (*dt & ADDED_HTML_EXTENSION)
1514 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1516 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1518 free(filename_plus_orig_suffix); /* must precede every return! */
1521 else if (hstat.len < hstat.contlen) /* meaning we lost the
1522 connection too soon */
1524 logprintf (LOG_VERBOSE,
1525 _("%s (%s) - Connection closed at byte %ld. "),
1526 tms, tmrate, hstat.len);
1527 printwhat (count, opt.ntry);
1530 else if (!opt.kill_longer) /* meaning we got more than expected */
1532 logprintf (LOG_VERBOSE,
1533 _("%s (%s) - `%s' saved [%ld/%ld])\n\n"),
1534 tms, tmrate, locf, hstat.len, hstat.contlen);
1535 logprintf (LOG_NONVERBOSE,
1536 "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
1537 tms, u->url, hstat.len, hstat.contlen, locf, count);
1539 downloaded_increase (hstat.len);
1541 /* Remember that we downloaded the file for later ".orig" code. */
1542 if (*dt & ADDED_HTML_EXTENSION)
1543 downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
1545 downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
1547 free(filename_plus_orig_suffix); /* must precede every return! */
1550 else /* the same, but not accepted */
1552 logprintf (LOG_VERBOSE,
1553 _("%s (%s) - Connection closed at byte %ld/%ld. "),
1554 tms, tmrate, hstat.len, hstat.contlen);
1555 printwhat (count, opt.ntry);
1559 else /* now hstat.res can only be -1 */
1561 if (hstat.contlen == -1)
1563 logprintf (LOG_VERBOSE,
1564 _("%s (%s) - Read error at byte %ld (%s)."),
1565 tms, tmrate, hstat.len, strerror (errno));
1566 printwhat (count, opt.ntry);
1569 else /* hstat.res == -1 and contlen is given */
1571 logprintf (LOG_VERBOSE,
1572 _("%s (%s) - Read error at byte %ld/%ld (%s). "),
1573 tms, tmrate, hstat.len, hstat.contlen,
1575 printwhat (count, opt.ntry);
1582 while (!opt.ntry || (count < opt.ntry));
1583 free(filename_plus_orig_suffix); /* must precede every return! */
1587 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
1588 than local timezone (mktime assumes the latter).
1590 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
1591 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
1593 mktime_from_utc (struct tm *t)
1600 tb = mktime (gmtime (&tl));
1601 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
1604 /* Check whether the result of strptime() indicates success.
1605 strptime() returns the pointer to how far it got to in the string.
1606 The processing has been successful if the string is at `GMT' or
1607 `+X', or at the end of the string.
1609 In extended regexp parlance, the function returns 1 if P matches
1610 "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (a valid result of
1611 strptime()) is considered a failure and 0 is returned. */
1613 check_end (const char *p)
1617 while (ISSPACE (*p))
1620 || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
1621 || ((p[0] == '+' || p[1] == '-') && ISDIGIT (p[1])))
1627 /* Convert TIME_STRING time to time_t. TIME_STRING can be in any of
1628 the three formats RFC2068 allows the HTTP servers to emit --
1629 RFC1123-date, RFC850-date or asctime-date. Timezones are ignored,
1632 We use strptime() to recognize various dates, which makes it a
1633 little bit slacker than the RFC1123/RFC850/asctime (e.g. it always
1634 allows shortened dates and months, one-digit days, etc.). It also
1635 allows more than one space anywhere where the specs require one SP.
1636 The routine should probably be even more forgiving (as recommended
1637 by RFC2068), but I do not have the time to write one.
1639 Return the computed time_t representation, or -1 if all the
1642 Needless to say, what we *really* need here is something like
1643 Marcus Hennecke's atotm(), which is forgiving, fast, to-the-point,
1644 and does not use strptime(). atotm() is to be found in the sources
1645 of `phttpd', a little-known HTTP server written by Peter Erikson. */
1647 http_atotm (char *time_string)
1651 /* Roger Beeman says: "This function dynamically allocates struct tm
1652 t, but does no initialization. The only field that actually
1653 needs initialization is tm_isdst, since the others will be set by
1654 strptime. Since strptime does not set tm_isdst, it will return
1655 the data structure with whatever data was in tm_isdst to begin
1656 with. For those of us in timezones where DST can occur, there
1657 can be a one hour shift depending on the previous contents of the
1658 data area where the data structure is allocated." */
1661 /* Note that under foreign locales Solaris strptime() fails to
1662 recognize English dates, which renders this function useless. I
1663 assume that other non-GNU strptime's are plagued by the same
1664 disease. We solve this by setting only LC_MESSAGES in
1665 i18n_initialize(), instead of LC_ALL.
1667 Another solution could be to temporarily set locale to C, invoke
1668 strptime(), and restore it back. This is slow and dirty,
1669 however, and locale support other than LC_MESSAGES can mess other
1670 things, so I rather chose to stick with just setting LC_MESSAGES.
1672 Also note that none of this is necessary under GNU strptime(),
1673 because it recognizes both international and local dates. */
1675 /* NOTE: We don't use `%n' for white space, as OSF's strptime uses
1676 it to eat all white space up to (and including) a newline, and
1677 the function fails if there is no newline (!).
1679 Let's hope all strptime() implementations use ` ' to skip *all*
1680 whitespace instead of just one (it works that way on all the
1681 systems I've tested it on). */
1683 /* RFC1123: Thu, 29 Jan 1998 22:12:57 */
1684 if (check_end (strptime (time_string, "%a, %d %b %Y %T", &t)))
1685 return mktime_from_utc (&t);
1686 /* RFC850: Thu, 29-Jan-98 22:12:57 */
1687 if (check_end (strptime (time_string, "%a, %d-%b-%y %T", &t)))
1688 return mktime_from_utc (&t);
1689 /* asctime: Thu Jan 29 22:12:57 1998 */
1690 if (check_end (strptime (time_string, "%a %b %d %T %Y", &t)))
1691 return mktime_from_utc (&t);
1696 /* Authorization support: We support two authorization schemes:
1698 * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
1700 * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
1701 consisting of answering to the server's challenge with the proper
1704 /* How many bytes it will take to store LEN bytes in base64. */
1705 #define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
1707 /* Encode the string S of length LENGTH to base64 format and place it
1708 to STORE. STORE will be 0-terminated, and must point to a writable
1709 buffer of at least 1+BASE64_LENGTH(length) bytes. */
1711 base64_encode (const char *s, char *store, int length)
1713 /* Conversion table. */
1714 static char tbl[64] = {
1715 'A','B','C','D','E','F','G','H',
1716 'I','J','K','L','M','N','O','P',
1717 'Q','R','S','T','U','V','W','X',
1718 'Y','Z','a','b','c','d','e','f',
1719 'g','h','i','j','k','l','m','n',
1720 'o','p','q','r','s','t','u','v',
1721 'w','x','y','z','0','1','2','3',
1722 '4','5','6','7','8','9','+','/'
1725 unsigned char *p = (unsigned char *)store;
1727 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
1728 for (i = 0; i < length; i += 3)
1730 *p++ = tbl[s[0] >> 2];
1731 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1732 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1733 *p++ = tbl[s[2] & 0x3f];
1736 /* Pad the result if necessary... */
1737 if (i == length + 1)
1739 else if (i == length + 2)
1740 *(p - 1) = *(p - 2) = '=';
1741 /* ...and zero-terminate it. */
1745 /* Create the authentication header contents for the `Basic' scheme.
1746 This is done by encoding the string `USER:PASS' in base64 and
1747 prepending `HEADER: Basic ' to it. */
1749 basic_authentication_encode (const char *user, const char *passwd,
1752 char *t1, *t2, *res;
1753 int len1 = strlen (user) + 1 + strlen (passwd);
1754 int len2 = BASE64_LENGTH (len1);
1756 t1 = (char *)alloca (len1 + 1);
1757 sprintf (t1, "%s:%s", user, passwd);
1758 t2 = (char *)alloca (1 + len2);
1759 base64_encode (t1, t2, len1);
1760 res = (char *)malloc (len2 + 11 + strlen (header));
1761 sprintf (res, "%s: Basic %s\r\n", header, t2);
1767 /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
1768 of a field in such a header. If the field is the one specified by
1769 ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
1770 digest authorization code), extract its value in the (char*)
1771 variable pointed by RET. Returns negative on a malformed header,
1772 or number of bytes that have been parsed by this call. */
1774 extract_header_attr (const char *au, const char *attr_name, char **ret)
1776 const char *cp, *ep;
1780 if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
1782 cp += strlen (attr_name);
1785 cp += skip_lws (cp);
1790 cp += skip_lws (cp);
1795 for (ep = cp; *ep && *ep != '\"'; ep++)
1800 *ret = strdupdelim (cp, ep);
1807 /* Response value needs to be in lowercase, so we cannot use HEXD2ASC
1808 from url.h. See RFC 2069 2.1.2 for the syntax of response-digest. */
1809 #define HEXD2asc(x) (((x) < 10) ? ((x) + '0') : ((x) - 10 + 'a'))
1811 /* Dump the hexadecimal representation of HASH to BUF. HASH should be
1812 an array of 16 bytes containing the hash keys, and BUF should be a
1813 buffer of 33 writable characters (32 for hex digits plus one for
1814 zero termination). */
1816 dump_hash (unsigned char *buf, const unsigned char *hash)
1820 for (i = 0; i < MD5_HASHLEN; i++, hash++)
1822 *buf++ = HEXD2asc (*hash >> 4);
1823 *buf++ = HEXD2asc (*hash & 0xf);
1828 /* Take the line apart to find the challenge, and compose a digest
1829 authorization header. See RFC2069 section 2.1.2. */
1831 digest_authentication_encode (const char *au, const char *user,
1832 const char *passwd, const char *method,
1835 static char *realm, *opaque, *nonce;
1840 { "realm", &realm },
1841 { "opaque", &opaque },
1846 realm = opaque = nonce = NULL;
1848 au += 6; /* skip over `Digest' */
1853 au += skip_lws (au);
1854 for (i = 0; i < ARRAY_SIZE (options); i++)
1856 int skip = extract_header_attr (au, options[i].name,
1857 options[i].variable);
1861 FREE_MAYBE (opaque);
1871 if (i == ARRAY_SIZE (options))
1873 while (*au && *au != '=')
1877 au += skip_lws (au);
1881 while (*au && *au != '\"')
1888 while (*au && *au != ',')
1893 if (!realm || !nonce || !user || !passwd || !path || !method)
1896 FREE_MAYBE (opaque);
1901 /* Calculate the digest value. */
1904 unsigned char hash[MD5_HASHLEN];
1905 unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
1906 unsigned char response_digest[MD5_HASHLEN * 2 + 1];
1908 /* A1BUF = H(user ":" realm ":" password) */
1909 md5_init_ctx (&ctx);
1910 md5_process_bytes (user, strlen (user), &ctx);
1911 md5_process_bytes (":", 1, &ctx);
1912 md5_process_bytes (realm, strlen (realm), &ctx);
1913 md5_process_bytes (":", 1, &ctx);
1914 md5_process_bytes (passwd, strlen (passwd), &ctx);
1915 md5_finish_ctx (&ctx, hash);
1916 dump_hash (a1buf, hash);
1918 /* A2BUF = H(method ":" path) */
1919 md5_init_ctx (&ctx);
1920 md5_process_bytes (method, strlen (method), &ctx);
1921 md5_process_bytes (":", 1, &ctx);
1922 md5_process_bytes (path, strlen (path), &ctx);
1923 md5_finish_ctx (&ctx, hash);
1924 dump_hash (a2buf, hash);
1926 /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
1927 md5_init_ctx (&ctx);
1928 md5_process_bytes (a1buf, MD5_HASHLEN * 2, &ctx);
1929 md5_process_bytes (":", 1, &ctx);
1930 md5_process_bytes (nonce, strlen (nonce), &ctx);
1931 md5_process_bytes (":", 1, &ctx);
1932 md5_process_bytes (a2buf, MD5_HASHLEN * 2, &ctx);
1933 md5_finish_ctx (&ctx, hash);
1934 dump_hash (response_digest, hash);
1936 res = (char*) xmalloc (strlen (user)
1941 + 2 * MD5_HASHLEN /*strlen (response_digest)*/
1942 + (opaque ? strlen (opaque) : 0)
1944 sprintf (res, "Authorization: Digest \
1945 username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
1946 user, realm, nonce, path, response_digest);
1949 char *p = res + strlen (res);
1950 strcat (p, ", opaque=\"");
1954 strcat (res, "\r\n");
1958 #endif /* USE_DIGEST */
1961 #define BEGINS_WITH(line, string_constant) \
1962 (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1963 && (ISSPACE (line[sizeof (string_constant) - 1]) \
1964 || !line[sizeof (string_constant) - 1]))
1967 known_authentication_scheme_p (const char *au)
1969 return BEGINS_WITH (au, "Basic")
1970 || BEGINS_WITH (au, "Digest")
1971 || BEGINS_WITH (au, "NTLM");
1976 /* Create the HTTP authorization request header. When the
1977 `WWW-Authenticate' response header is seen, according to the
1978 authorization scheme specified in that header (`Basic' and `Digest'
1979 are supported by the current implementation), produce an
1980 appropriate HTTP authorization request header. */
1982 create_authorization_line (const char *au, const char *user,
1983 const char *passwd, const char *method,
1986 char *wwwauth = NULL;
1988 if (!strncasecmp (au, "Basic", 5))
1989 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
1990 if (!strncasecmp (au, "NTLM", 4))
1991 wwwauth = basic_authentication_encode (user, passwd, "Authorization");
1993 else if (!strncasecmp (au, "Digest", 6))
1994 wwwauth = digest_authentication_encode (au, user, passwd, method, path);
1995 #endif /* USE_DIGEST */