X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=abaa4bdb34e3f31ddb344e0a8f234c07a46462c0;hb=9228f0bf53d3b42459daeb28372196a007de3014;hp=4313176f33c7bdc457f2fed4453fde90e5e8b41d;hpb=84396de6731454b46c70b1278a9270c24d6a6684;p=wget diff --git a/src/http.c b/src/http.c index 4313176f..abaa4bdb 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,5 @@ /* HTTP support. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 + Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -16,7 +16,17 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include @@ -44,12 +54,6 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ # endif #endif -#ifdef WINDOWS -# include -#else -# include /* for h_errno */ -#endif - #include "wget.h" #include "utils.h" #include "url.h" @@ -64,19 +68,18 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ # include "gen_sslfunc.h" #endif /* HAVE_SSL */ #include "cookies.h" +#ifdef USE_DIGEST +# include "gen-md5.h" +#endif extern char *version_string; #ifndef errno extern int errno; #endif -#ifndef h_errno -# ifndef __CYGWIN__ -extern int h_errno; -# endif -#endif static int cookies_loaded_p; +struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define HTTP_ACCEPT "*/*" @@ -178,6 +181,80 @@ parse_http_status_line (const char *line, const char **reason_phrase_ptr) return statcode; } +#define WMIN(x, y) ((x) > (y) ? (y) : (x)) + +/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly + PROMISED_SIZE bytes are sent over the wire -- if the file is + longer, read only that much; if the file is shorter, pad it with + zeros. */ + +static int +post_file (int sock, void *ssl, const char *file_name, long promised_size) +{ + static char chunk[8192]; + long written = 0; + int write_error; + FILE *fp; + + /* Only one of SOCK and SSL may be active at the same time. */ + assert (sock > -1 || ssl != NULL); + assert (sock == -1 || ssl == NULL); + + DEBUGP (("[writing POST file %s ... ", file_name)); + + fp = fopen (file_name, "rb"); + if (!fp) + goto pad; + while (written < promised_size) + { + int towrite; + int length = fread (chunk, 1, sizeof (chunk), fp); + if (length == 0) + break; + towrite = WMIN (promised_size - written, length); +#ifdef HAVE_SSL + if (ssl) + write_error = ssl_iwrite (ssl, chunk, towrite); + else +#endif + write_error = iwrite (sock, chunk, towrite); + if (write_error < 0) + { + fclose (fp); + return -1; + } + written += towrite; + } + fclose (fp); + + pad: + if (written < promised_size) + { + /* This highly unlikely case can happen only if the file has + shrunk under us. To uphold the promise that exactly + promised_size bytes would be delivered, pad the remaining + data with zeros. #### Should we abort instead? */ + DEBUGP (("padding %ld bytes ... ", promised_size - written)); + memset (chunk, '\0', sizeof (chunk)); + while (written < promised_size) + { + int towrite = WMIN (promised_size - written, sizeof (chunk)); +#ifdef HAVE_SSL + if (ssl) + write_error = ssl_iwrite (ssl, chunk, towrite); + else +#endif + write_error = iwrite (sock, chunk, towrite); + if (write_error < 0) + return -1; + written += towrite; + } + } + assert (written == promised_size); + DEBUGP (("done]\n")); + return 0; +} + /* Functions to be used as arguments to header_process(): */ struct http_process_range_closure { @@ -202,6 +279,10 @@ http_process_range (const char *hdr, void *arg) if (!strncasecmp (hdr, "bytes", 5)) { hdr += 5; + /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the + HTTP spec. */ + if (*hdr == ':') + ++hdr; hdr += skip_lws (hdr); if (!*hdr) return 0; @@ -264,6 +345,22 @@ http_process_connection (const char *hdr, void *arg) *flag = 1; return 1; } + +/* Commit the cookie to the cookie jar. */ + +int +http_process_set_cookie (const char *hdr, void *arg) +{ + struct url *u = (struct url *)arg; + + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + + cookie_jar_process_set_cookie (wget_cookie_jar, u->host, u->port, u->path, + hdr); + return 1; +} + /* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to @@ -317,13 +414,15 @@ invalidate_persistent (void) If a previous connection was persistent, it is closed. */ -static void -register_persistent (const char *host, unsigned short port, int fd #ifdef HAVE_SSL - , SSL *ssl -#endif - ) +static void +register_persistent (const char *host, unsigned short port, int fd, SSL *ssl) +{ +#else +static void +register_persistent (const char *host, unsigned short port, int fd) { +#endif if (pc_active_p) { if (pc_last_fd == fd) @@ -367,16 +466,27 @@ register_persistent (const char *host, unsigned short port, int fd DEBUGP (("Registered fd %d for persistent reuse.\n", fd)); } +#ifdef HAVE_SSL +# define SHUTDOWN_SSL(ssl) do { \ + if (ssl) \ + shutdown_ssl (ssl); \ +} while (0) +#else +# define SHUTDOWN_SSL(ssl) +#endif + /* Return non-zero if a persistent connection is available for connecting to HOST:PORT. */ -static int -persistent_available_p (const char *host, unsigned short port #ifdef HAVE_SSL - , int ssl -#endif - ) +static int +persistent_available_p (const char *host, unsigned short port, int ssl) +{ +#else +static int +persistent_available_p (const char *host, unsigned short port) { +#endif int success; struct address_list *this_host_ip; @@ -422,21 +532,16 @@ persistent_available_p (const char *host, unsigned short port let's invalidate the persistent connection before returning 0. */ CLOSE (pc_last_fd); +#ifdef HAVE_SSL + SHUTDOWN_SSL (pc_last_ssl); + pc_last_ssl = NULL; +#endif invalidate_persistent (); return 0; } return 1; } -#ifdef HAVE_SSL -# define SHUTDOWN_SSL(ssl) do { \ - if (ssl) \ - shutdown_ssl (ssl); \ -} while (0) -#else -# define SHUTDOWN_SSL(ssl) -#endif - /* The idea behind these two CLOSE macros is to distinguish between two cases: one when the job we've been doing is finished, and we want to close the connection and leave, and two when something is @@ -479,7 +584,7 @@ struct http_stat char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ - long dltime; /* time of the download */ + double dltime; /* time of the download in msecs */ int no_truncate; /* whether truncating the file is forbidden. */ const char *referer; /* value of the referer header. */ @@ -506,7 +611,7 @@ static char *basic_authentication_encode PARAMS ((const char *, const char *, const char *)); static int known_authentication_scheme_p PARAMS ((const char *)); -time_t http_atotm PARAMS ((char *)); +time_t http_atotm PARAMS ((const char *)); #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ @@ -536,7 +641,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) char *all_headers; char *port_maybe; char *request_keep_alive; - int sock, hcount, num_written, all_length, statcode; + int sock, hcount, all_length, statcode; + int write_error; long contlen, contrange; struct url *conn; FILE *fp; @@ -545,7 +651,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) #ifdef HAVE_SSL static SSL_CTX *ssl_ctx = NULL; SSL *ssl = NULL; -#endif /* HAVE_SSL */ +#endif char *cookies = NULL; /* Whether this connection will be kept alive after the HTTP request @@ -559,11 +665,20 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Whether keep-alive should be inhibited. */ int inhibit_keep_alive; + /* Whether we need to print the host header with braces around host, + e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual + "Host: symbolic-name:1234". */ + int squares_around_host = 0; + + /* Headers sent when using POST. */ + char *post_content_type, *post_content_length; + long post_data_size = 0; + #ifdef HAVE_SSL /* initialize ssl_ctx on first run */ if (!ssl_ctx) { - err = init_ssl (&ssl_ctx); + uerr_t err = init_ssl (&ssl_ctx); if (err != 0) { switch (err) @@ -615,6 +730,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) keep_alive = 0; http_keep_alive_1 = http_keep_alive_2 = 0; + post_content_type = NULL; + post_content_length = NULL; + /* Initialize certain elements of struct http_stat. */ hs->len = 0L; hs->contlen = -1; @@ -647,7 +765,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) address_list_release (al); if (sock < 0) - return errno == ECONNREFUSED ? CONREFUSED : CONERROR; + return CONNECT_ERROR (errno); #ifdef HAVE_SSL if (conn->scheme == SCHEME_HTTPS) @@ -674,7 +792,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) DEBUGP (("Reusing fd %d.\n", sock)); } - command = (*dt & HEAD_ONLY) ? "HEAD" : "GET"; + if (*dt & HEAD_ONLY) + command = "HEAD"; + else if (opt.post_file_name || opt.post_data) + command = "POST"; + else + command = "GET"; referer = NULL; if (hs->referer) @@ -745,8 +868,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } else { + /* Use the full path, i.e. one that includes the leading + slash and the query string, but is independent of proxy + setting. */ + char *pth = url_full_path (u); wwwauth = create_authorization_line (authenticate_h, user, passwd, - command, u->path); + command, pth); + xfree (pth); } } @@ -790,19 +918,46 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_keep_alive = NULL; if (opt.cookies) - cookies = build_cookies_request (u->host, u->port, u->path, + cookies = cookie_jar_generate_cookie_header (wget_cookie_jar, u->host, + u->port, u->path, #ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS + u->scheme == SCHEME_HTTPS #else - 0 + 0 #endif - ); + ); + + if (opt.post_data || opt.post_file_name) + { + post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n"; + if (opt.post_data) + post_data_size = strlen (opt.post_data); + else + { + post_data_size = file_size (opt.post_file_name); + if (post_data_size == -1) + { + logprintf (LOG_NOTQUIET, "POST data file missing: %s\n", + opt.post_file_name); + post_data_size = 0; + } + } + post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1); + sprintf (post_content_length, + "Content-Length: %ld\r\n", post_data_size); + } if (proxy) full_path = xstrdup (u->url); else + /* Use the full path, i.e. one that includes the leading slash and + the query string. E.g. if u->path is "foo/bar" and u->query is + "param=value", full_path will be "/foo/bar?param=value". */ full_path = url_full_path (u); + if (strchr (u->host, ':')) + squares_around_host = 1; + /* Allocate the memory for the request. */ request = (char *)alloca (strlen (command) + strlen (full_path) @@ -818,17 +973,22 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) + (proxyauth ? strlen (proxyauth) : 0) + (range ? strlen (range) : 0) + strlen (pragma_h) + + (post_content_type + ? strlen (post_content_type) : 0) + + (post_content_length + ? strlen (post_content_length) : 0) + (opt.user_header ? strlen (opt.user_header) : 0) + 64); /* Construct the request. */ sprintf (request, "\ %s %s HTTP/1.0\r\n\ User-Agent: %s\r\n\ -Host: %s%s\r\n\ +Host: %s%s%s%s\r\n\ Accept: %s\r\n\ -%s%s%s%s%s%s%s%s\r\n", +%s%s%s%s%s%s%s%s%s%s\r\n", command, full_path, - useragent, u->host, + useragent, + squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "", port_maybe ? port_maybe : "", HTTP_ACCEPT, request_keep_alive ? request_keep_alive : "", @@ -837,9 +997,11 @@ Accept: %s\r\n\ wwwauth ? wwwauth : "", proxyauth ? proxyauth : "", range ? range : "", - pragma_h, + pragma_h, + post_content_type ? post_content_type : "", + post_content_length ? post_content_length : "", opt.user_header ? opt.user_header : ""); - DEBUGP (("---request begin---\n%s---request end---\n", request)); + DEBUGP (("---request begin---\n%s", request)); /* Free the temporary memory. */ FREE_MAYBE (wwwauth); @@ -849,13 +1011,39 @@ Accept: %s\r\n\ /* Send the request to server. */ #ifdef HAVE_SSL - if (u->scheme == SCHEME_HTTPS) - num_written = ssl_iwrite (ssl, request, strlen (request)); + if (conn->scheme == SCHEME_HTTPS) + write_error = ssl_iwrite (ssl, request, strlen (request)); else -#endif /* HAVE_SSL */ - num_written = iwrite (sock, request, strlen (request)); +#endif + write_error = iwrite (sock, request, strlen (request)); + + if (write_error >= 0) + { + if (opt.post_data) + { + DEBUGP (("[POST data: %s]\n", opt.post_data)); +#ifdef HAVE_SSL + if (conn->scheme == SCHEME_HTTPS) + write_error = ssl_iwrite (ssl, opt.post_data, post_data_size); + else +#endif + write_error = iwrite (sock, opt.post_data, post_data_size); + } + else if (opt.post_file_name) + { +#ifdef HAVE_SSL + if (conn->scheme == SCHEME_HTTPS) + write_error = post_file (-1, ssl, opt.post_file_name, + post_data_size); + else +#endif + write_error = post_file (sock, NULL, opt.post_file_name, + post_data_size); + } + } + DEBUGP (("---request end---\n")); - if (num_written < 0) + if (write_error < 0) { logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), strerror (errno)); @@ -872,7 +1060,7 @@ Accept: %s\r\n\ /* Before reading anything, initialize the rbuf. */ rbuf_initialize (&rbuf, sock); #ifdef HAVE_SSL - if (u->scheme == SCHEME_HTTPS) + if (conn->scheme == SCHEME_HTTPS) rbuf.ssl = ssl; else rbuf.ssl = NULL; @@ -1008,7 +1196,7 @@ Accept: %s\r\n\ goto done_header; /* Try getting cookies. */ if (opt.cookies) - if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u)) + if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u)) goto done_header; /* Try getting www-authentication. */ if (!authenticate_h) @@ -1148,10 +1336,12 @@ Accept: %s\r\n\ } } - if (type && !strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S))) + /* If content-type is not given, assume text/html. This is because + of the multitude of broken CGI's that "forget" to generate the + content-type. */ + if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S))) *dt |= TEXTHTML; else - /* We don't assume text/html by default. */ *dt &= ~TEXTHTML; if (opt.html_extension && (*dt & TEXTHTML)) @@ -1207,6 +1397,8 @@ Accept: %s\r\n\ /* In case the caller inspects. */ hs->len = contlen; hs->res = 0; + /* Mark as successfully retrieved. */ + *dt |= RETROKF; FREE_MAYBE (type); FREE_MAYBE (all_headers); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there @@ -1326,8 +1518,12 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); #### A possible solution to this would be to remember the file position in the output document and to seek to that - position, instead of rewinding. */ - if (!hs->restval && global_download_count == 0) + position, instead of rewinding. + + We don't truncate stdout, since that breaks + "wget -O - [...] >> foo". + */ + if (!hs->restval && global_download_count == 0 && opt.dfp != stdout) { /* This will silently fail for streams that don't correspond to regular files, but that's OK. */ @@ -1349,6 +1545,11 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); (contlen != -1 ? contlen : 0), &rbuf, keep_alive, &hs->dltime); + if (hs->res >= 0) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + { /* Close or flush the file. We have to be careful to check for error here. Checking the result of fwrite() is not enough -- @@ -1362,7 +1563,6 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); hs->res = -2; } FREE_MAYBE (all_headers); - CLOSE_FINISH (sock); if (hs->res == -2) return FWRITEERR; return RETRFINISHED; @@ -1378,7 +1578,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, int use_ts, got_head = 0; /* time-stamping info */ char *filename_plus_orig_suffix; char *local_filename = NULL; - char *tms, *suf, *locf, *tmrate; + char *tms, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* local and remote time-stamps */ long local_size = 0; /* the size of the local file */ @@ -1390,10 +1590,15 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* This used to be done in main(), but it's a better idea to do it here so that we don't go through the hoops if we're just using FTP or whatever. */ - if (opt.cookies && opt.cookies_input && !cookies_loaded_p) + if (opt.cookies) { - load_cookies (opt.cookies_input); - cookies_loaded_p = 1; + if (!wget_cookie_jar) + wget_cookie_jar = cookie_jar_new (); + if (opt.cookies_input && !cookies_loaded_p) + { + cookie_jar_load (wget_cookie_jar, opt.cookies_input); + cookies_loaded_p = 1; + } } *newloc = NULL; @@ -1409,12 +1614,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, hstat.local_file = local_file; else if (local_file) { - *local_file = url_filename (u); + *local_file = url_file_name (u); hstat.local_file = local_file; } else { - dummy = url_filename (u); + dummy = url_file_name (u); hstat.local_file = &dummy; } @@ -1438,9 +1643,8 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); *dt |= RETROKF; /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm", assume text/html. */ - if (((suf = suffix (*hstat.local_file)) != NULL) - && (!strcmp (suf, "html") || !strcmp (suf, "htm"))) + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (*hstat.local_file)) *dt |= TEXTHTML; FREE_MAYBE (dummy); @@ -1494,6 +1698,11 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); { use_ts = 1; tml = st.st_mtime; +#ifdef WINDOWS + /* Modification time granularity is 2 seconds for Windows, so + increase local time by 1 second for later comparison. */ + tml++; +#endif local_size = st.st_size; got_head = 0; } @@ -1983,7 +2192,7 @@ check_end (const char *p) it is not assigned to the FSF. So I stuck it with strptime. */ time_t -http_atotm (char *time_string) +http_atotm (const char *time_string) { /* NOTE: Solaris strptime man page claims that %n and %t match white space, but that's not universally available. Instead, we simply @@ -2156,7 +2365,7 @@ dump_hash (unsigned char *buf, const unsigned char *hash) /* Take the line apart to find the challenge, and compose a digest authorization header. See RFC2069 section 2.1.2. */ -char * +static char * digest_authentication_encode (const char *au, const char *user, const char *passwd, const char *method, const char *path) @@ -2229,37 +2438,37 @@ digest_authentication_encode (const char *au, const char *user, /* Calculate the digest value. */ { - MD5_CONTEXT_TYPE ctx; + ALLOCA_MD5_CONTEXT (ctx); unsigned char hash[MD5_HASHLEN]; unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1]; unsigned char response_digest[MD5_HASHLEN * 2 + 1]; /* A1BUF = H(user ":" realm ":" password) */ - MD5_INIT (&ctx); - MD5_UPDATE (user, strlen (user), &ctx); - MD5_UPDATE (":", 1, &ctx); - MD5_UPDATE (realm, strlen (realm), &ctx); - MD5_UPDATE (":", 1, &ctx); - MD5_UPDATE (passwd, strlen (passwd), &ctx); - MD5_FINISH (&ctx, hash); + gen_md5_init (ctx); + gen_md5_update ((unsigned char *)user, strlen (user), ctx); + gen_md5_update ((unsigned char *)":", 1, ctx); + gen_md5_update ((unsigned char *)realm, strlen (realm), ctx); + gen_md5_update ((unsigned char *)":", 1, ctx); + gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx); + gen_md5_finish (ctx, hash); dump_hash (a1buf, hash); /* A2BUF = H(method ":" path) */ - MD5_INIT (&ctx); - MD5_UPDATE (method, strlen (method), &ctx); - MD5_UPDATE (":", 1, &ctx); - MD5_UPDATE (path, strlen (path), &ctx); - MD5_FINISH (&ctx, hash); + gen_md5_init (ctx); + gen_md5_update ((unsigned char *)method, strlen (method), ctx); + gen_md5_update ((unsigned char *)":", 1, ctx); + gen_md5_update ((unsigned char *)path, strlen (path), ctx); + gen_md5_finish (ctx, hash); dump_hash (a2buf, hash); /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */ - MD5_INIT (&ctx); - MD5_UPDATE (a1buf, MD5_HASHLEN * 2, &ctx); - MD5_UPDATE (":", 1, &ctx); - MD5_UPDATE (nonce, strlen (nonce), &ctx); - MD5_UPDATE (":", 1, &ctx); - MD5_UPDATE (a2buf, MD5_HASHLEN * 2, &ctx); - MD5_FINISH (&ctx, hash); + gen_md5_init (ctx); + gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx); + gen_md5_update ((unsigned char *)":", 1, ctx); + gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx); + gen_md5_update ((unsigned char *)":", 1, ctx); + gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx); + gen_md5_finish (ctx, hash); dump_hash (response_digest, hash); res = (char*) xmalloc (strlen (user)