X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fconnect.c;h=1e4a44bae3d8d98e12b6efaffc195feb7e650ac0;hp=4b09c2da9739c05b973b40457831a3841ababf3d;hb=HEAD;hpb=133d69ff24cc562b68fef463780f1858c4028d99 diff --git a/src/connect.c b/src/connect.c index 4b09c2da..1e4a44ba 100644 --- a/src/connect.c +++ b/src/connect.c @@ -1,11 +1,13 @@ /* Establishing and handling network connections. - Copyright (C) 1995, 1996, 1997, 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software + Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,32 +16,35 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" #include #include -#include -#ifdef HAVE_UNISTD_H -# include -#endif +#include #include +#include +#include + #ifndef WINDOWS -# include -# include +# ifdef __VMS +# include "vms_ip.h" +# else /* def __VMS */ +# include +# endif /* def __VMS [else] */ # include # ifndef __BEOS__ # include @@ -47,26 +52,27 @@ so, delete this exception statement from your version. */ #endif /* not WINDOWS */ #include -#ifdef HAVE_STRING_H -# include -#else -# include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_SYS_SELECT_H -# include -#endif /* HAVE_SYS_SELECT_H */ - -#include "wget.h" +#include +#include #include "utils.h" #include "host.h" #include "connect.h" #include "hash.h" -#ifndef errno -extern int errno; +/* Apparently needed for Interix: */ +#ifdef HAVE_STDINT_H +# include #endif - +/* Define sockaddr_storage where unavailable (presumably on IPv4-only + hosts). */ + +#ifndef ENABLE_IPV6 +# ifndef HAVE_STRUCT_SOCKADDR_STORAGE +# define sockaddr_storage sockaddr_in +# endif +#endif /* ENABLE_IPV6 */ + /* Fill SA as per the data in IP and PORT. SA shoult point to struct sockaddr_storage if ENABLE_IPV6 is defined, to struct sockaddr_in otherwise. */ @@ -74,27 +80,29 @@ extern int errno; static void sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port) { - switch (ip->type) + switch (ip->family) { - case IPV4_ADDRESS: + case AF_INET: { - struct sockaddr_in *sin = (struct sockaddr_in *)sa; - sin->sin_family = AF_INET; - sin->sin_port = htons (port); - sin->sin_addr = ADDRESS_IPV4_IN_ADDR (ip); - break; + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + xzero (*sin); + sin->sin_family = AF_INET; + sin->sin_port = htons (port); + sin->sin_addr = ip->data.d4; + break; } #ifdef ENABLE_IPV6 - case IPV6_ADDRESS: + case AF_INET6: { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = htons (port); - sin6->sin6_addr = ADDRESS_IPV6_IN6_ADDR (ip); + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + xzero (*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons (port); + sin6->sin6_addr = ip->data.d6; #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID - sin6->sin6_scope_id = ADDRESS_IPV6_SCOPE (ip); + sin6->sin6_scope_id = ip->ipv6_scope; #endif - break; + break; } #endif /* ENABLE_IPV6 */ default: @@ -113,31 +121,31 @@ sockaddr_get_data (const struct sockaddr *sa, ip_address *ip, int *port) { case AF_INET: { - struct sockaddr_in *sin = (struct sockaddr_in *)sa; - if (ip) - { - ip->type = IPV4_ADDRESS; - ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr; - } - if (port) - *port = ntohs (sin->sin_port); - break; + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + if (ip) + { + ip->family = AF_INET; + ip->data.d4 = sin->sin_addr; + } + if (port) + *port = ntohs (sin->sin_port); + break; } #ifdef ENABLE_IPV6 case AF_INET6: { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; - if (ip) - { - ip->type = IPV6_ADDRESS; - ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + if (ip) + { + ip->family = AF_INET6; + ip->data.d6 = sin6->sin6_addr; #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID - ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id; + ip->ipv6_scope = sin6->sin6_scope_id; #endif - } - if (port) - *port = ntohs (sin6->sin6_port); - break; + } + if (port) + *port = ntohs (sin6->sin6_port); + break; } #endif default: @@ -161,36 +169,41 @@ sockaddr_size (const struct sockaddr *sa) #endif default: abort (); - return 0; /* so the compiler shuts up. */ } } -static int +/* Resolve the bind address specified via --bind-address and store it + to SA. The resolved value is stored in a static variable and + reused after the first invocation of this function. + + Returns true on success, false on failure. */ + +static bool resolve_bind_address (struct sockaddr *sa) { struct address_list *al; /* Make sure this is called only once. opt.bind_address doesn't change during a Wget run. */ - static int called, should_bind; + static bool called, should_bind; static ip_address ip; if (called) { if (should_bind) - sockaddr_set_data (sa, &ip, 0); + sockaddr_set_data (sa, &ip, 0); return should_bind; } - called = 1; + called = true; al = lookup_host (opt.bind_address, LH_BIND | LH_SILENT); if (!al) { /* #### We should be able to print the error message here. */ logprintf (LOG_NOTQUIET, - _("%s: unable to resolve bind address `%s'; disabling bind.\n"), - exec_name, opt.bind_address); - should_bind = 0; - return 0; + _("%s: unable to resolve bind address %s; disabling bind.\n"), + exec_name, quote (opt.bind_address)); + should_bind = false; + return false; } /* Pick the first address in the list and use it as bind address. @@ -200,8 +213,8 @@ resolve_bind_address (struct sockaddr *sa) address_list_release (al); sockaddr_set_data (sa, &ip, 0); - should_bind = 1; - return 1; + should_bind = true; + return true; } struct cwt_context { @@ -224,7 +237,7 @@ connect_with_timeout_callback (void *arg) static int connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen, - double timeout) + double timeout) { struct cwt_context ctx; ctx.fd = fd; @@ -241,25 +254,51 @@ connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen, return ctx.result; } -/* Connect to a remote endpoint whose IP address is known. */ +/* Connect via TCP to the specified address and port. + + If PRINT is non-NULL, it is the host name to print that we're + connecting to. */ int connect_to_ip (const ip_address *ip, int port, const char *print) { struct sockaddr_storage ss; struct sockaddr *sa = (struct sockaddr *)&ss; - int sock = -1; + int sock; /* If PRINT is non-NULL, print the "Connecting to..." line, with PRINT being the host name we're connecting to. */ if (print) { - const char *txt_addr = pretty_print_address (ip); - if (print && 0 != strcmp (print, txt_addr)) - logprintf (LOG_VERBOSE, - _("Connecting to %s|%s|:%d... "), print, txt_addr, port); + const char *txt_addr = print_address (ip); + if (0 != strcmp (print, txt_addr)) + { + char *str = NULL, *name; + + if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL) + { + int len = strlen (print) + strlen (name) + 4; + str = xmalloc (len); + snprintf (str, len, "%s (%s)", name, print); + str[len-1] = '\0'; + xfree (name); + } + + logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "), + str ? str : escnonprint_uri (print), txt_addr, port); + + if (str) + xfree (str); + } else - logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port); + { + if (ip->family == AF_INET) + logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port); +#ifdef ENABLE_IPV6 + else if (ip->family == AF_INET6) + logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port); +#endif + } } /* Store the sockaddr info to SA. */ @@ -270,6 +309,17 @@ connect_to_ip (const ip_address *ip, int port, const char *print) if (sock < 0) goto err; +#if defined(ENABLE_IPV6) && defined(IPV6_V6ONLY) + if (opt.ipv6_only) { + int on = 1; + /* In case of error, we will go on anyway... */ + int err = setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof (on)); + IF_DEBUG + if (err < 0) + DEBUGP (("Failed setting IPV6_V6ONLY: %s", strerror (errno))); + } +#endif + /* For very small rate limits, set the buffer size (and hence, hopefully, the kernel's TCP window size) to the per-second limit. That way we should never have to sleep for more than 1s between @@ -278,31 +328,31 @@ connect_to_ip (const ip_address *ip, int port, const char *print) { int bufsize = opt.limit_rate; if (bufsize < 512) - bufsize = 512; /* avoid pathologically small values */ + bufsize = 512; /* avoid pathologically small values */ #ifdef SO_RCVBUF setsockopt (sock, SOL_SOCKET, SO_RCVBUF, - (void *)&bufsize, (socklen_t)sizeof (bufsize)); + (void *)&bufsize, (socklen_t)sizeof (bufsize)); #endif /* When we add limit_rate support for writing, which is useful - for POST, we should also set SO_SNDBUF here. */ + for POST, we should also set SO_SNDBUF here. */ } if (opt.bind_address) { /* Bind the client side of the socket to the requested - address. */ + address. */ struct sockaddr_storage bind_ss; struct sockaddr *bind_sa = (struct sockaddr *)&bind_ss; if (resolve_bind_address (bind_sa)) - { + { if (bind (sock, bind_sa, sockaddr_size (bind_sa)) < 0) - goto err; - } + goto err; + } } /* Connect the socket to the remote endpoint. */ if (connect_with_timeout (sock, sa, sockaddr_size (sa), - opt.connect_timeout) < 0) + opt.connect_timeout) < 0) goto err; /* Success. */ @@ -318,28 +368,36 @@ connect_to_ip (const ip_address *ip, int port, const char *print) logprintf. */ int save_errno = errno; if (sock >= 0) - xclose (sock); + fd_close (sock); if (print) - logprintf (LOG_VERBOSE, "failed: %s.\n", strerror (errno)); + logprintf (LOG_VERBOSE, _("failed: %s.\n"), strerror (errno)); errno = save_errno; return -1; } } -/* Connect to a remote endpoint specified by host name. */ +/* Connect via TCP to a remote host on the specified port. + + HOST is resolved as an Internet host name. If HOST resolves to + more than one IP address, they are tried in the order returned by + DNS until connecting to one of them succeeds. */ int connect_to_host (const char *host, int port) { int i, start, end; - struct address_list *al; - int lh_flags = 0; - int sock = -1; + int sock; - again: - al = lookup_host (host, lh_flags); + struct address_list *al = lookup_host (host, 0); + + retry: if (!al) - return E_HOST; + { + logprintf (LOG_NOTQUIET, + _("%s: unable to resolve host address %s\n"), + exec_name, quote (host)); + return E_HOST; + } address_list_get_bounds (al, &start, &end); for (i = start; i < end; i++) @@ -347,64 +405,34 @@ connect_to_host (const char *host, int port) const ip_address *ip = address_list_address_at (al, i); sock = connect_to_ip (ip, port, host); if (sock >= 0) - /* Success. */ - break; - - address_list_set_faulty (al, i); + { + /* Success. */ + address_list_set_connected (al); + address_list_release (al); + return sock; + } /* The attempt to connect has failed. Continue with the loop - and try next address. */ - } - address_list_release (al); - - if (sock >= 0) - /* Mark a successful connection to one of the addresses. */ - address_list_set_connected (al); + and try next address. */ - if (sock < 0 && address_list_connected_p (al)) - { - /* We are unable to connect to any of HOST's addresses, although - we were previously able to connect to HOST. That might - indicate that HOST is under dynamic DNS and the addresses - we're connecting to have expired. Resolve it again. */ - lh_flags |= LH_REFRESH; - goto again; + address_list_set_faulty (al, i); } - return sock; -} - -int -test_socket_open (int sock) -{ -#ifdef HAVE_SELECT - fd_set check_set; - struct timeval to; - - /* Check if we still have a valid (non-EOF) connection. From Andrew - * Maholski's code in the Unix Socket FAQ. */ - - FD_ZERO (&check_set); - FD_SET (sock, &check_set); - - /* Wait one microsecond */ - to.tv_sec = 0; - to.tv_usec = 1; + /* Failed to connect to any of the addresses in AL. */ - /* If we get a timeout, then that means still connected */ - if (select (sock + 1, &check_set, NULL, NULL, &to) == 0) + if (address_list_connected_p (al)) { - /* Connection is valid (not EOF), so continue */ - return 1; + /* We connected to AL before, but cannot do so now. That might + indicate that our DNS cache entry for HOST has expired. */ + address_list_release (al); + al = lookup_host (host, LH_REFRESH); + goto retry; } - else - return 0; -#else - /* Without select, it's hard to know for sure. */ - return 1; -#endif -} + address_list_release (al); + return -1; +} + /* Create a socket, bind it to local interface BIND_ADDRESS on port *PORT, set up a listen backlog, and return the resulting socket, or -1 in case of error. @@ -422,7 +450,6 @@ int bind_local (const ip_address *bind_address, int *port) { int sock; - int family = AF_INET; struct sockaddr_storage ss; struct sockaddr *sa = (struct sockaddr *)&ss; @@ -431,12 +458,7 @@ bind_local (const ip_address *bind_address, int *port) void *setopt_ptr = (void *)&setopt_val; socklen_t setopt_size = sizeof (setopt_val); -#ifdef ENABLE_IPV6 - if (bind_address->type == IPV6_ADDRESS) - family = AF_INET6; -#endif - - sock = socket (family, SOCK_STREAM, 0); + sock = socket (bind_address->family, SOCK_STREAM, 0); if (sock < 0) return -1; @@ -444,18 +466,11 @@ bind_local (const ip_address *bind_address, int *port) setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size); #endif -#ifdef ENABLE_IPV6 -# ifdef HAVE_IPV6_V6ONLY - if (family == AF_INET6) - setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, setopt_ptr, setopt_size); -# endif -#endif - xzero (ss); sockaddr_set_data (sa, bind_address, *port); if (bind (sock, sa, sockaddr_size (sa)) < 0) { - xclose (sock); + fd_close (sock); return -1; } DEBUGP (("Local socket fd %d bound.\n", sock)); @@ -463,23 +478,23 @@ bind_local (const ip_address *bind_address, int *port) /* If *PORT is 0, find out which port we've bound to. */ if (*port == 0) { - socklen_t sa_len = sockaddr_size (sa); - if (getsockname (sock, sa, &sa_len) < 0) - { - /* If we can't find out the socket's local address ("name"), - something is seriously wrong with the socket, and it's - unusable for us anyway because we must know the chosen - port. */ - xclose (sock); - return -1; - } + socklen_t addrlen = sockaddr_size (sa); + if (getsockname (sock, sa, &addrlen) < 0) + { + /* If we can't find out the socket's local address ("name"), + something is seriously wrong with the socket, and it's + unusable for us anyway because we must know the chosen + port. */ + fd_close (sock); + return -1; + } sockaddr_get_data (sa, NULL, port); - DEBUGP (("binding to address %s using port %i.\n", - pretty_print_address (bind_address), *port)); + DEBUGP (("binding to address %s using port %i.\n", + print_address (bind_address), *port)); } if (listen (sock, 1) < 0) { - xclose (sock); + fd_close (sock); return -1; } return sock; @@ -506,36 +521,35 @@ accept_connection (int local_sock) struct sockaddr *sa = (struct sockaddr *)&ss; socklen_t addrlen = sizeof (ss); -#ifdef HAVE_SELECT if (opt.connect_timeout) { int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ); if (test == 0) - errno = ETIMEDOUT; + errno = ETIMEDOUT; if (test <= 0) - return -1; + return -1; } -#endif sock = accept (local_sock, sa, &addrlen); DEBUGP (("Accepted client at socket %d.\n", sock)); return sock; } /* Get the IP address associated with the connection on FD and store - it to IP. Return 1 on success, 0 otherwise. + it to IP. Return true on success, false otherwise. If ENDPOINT is ENDPOINT_LOCAL, it returns the address of the local (client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it returns the address of the remote (peer's) side of the socket. */ -int +bool socket_ip_address (int sock, ip_address *ip, int endpoint) { struct sockaddr_storage storage; - struct sockaddr *sockaddr = (struct sockaddr *)&storage; + struct sockaddr *sockaddr = (struct sockaddr *) &storage; socklen_t addrlen = sizeof (storage); int ret; + memset (sockaddr, 0, addrlen); if (endpoint == ENDPOINT_LOCAL) ret = getsockname (sock, sockaddr, &addrlen); else if (endpoint == ENDPOINT_PEER) @@ -543,57 +557,85 @@ socket_ip_address (int sock, ip_address *ip, int endpoint) else abort (); if (ret < 0) - return 0; + return false; + memset(ip, 0, sizeof(ip_address)); + ip->family = sockaddr->sa_family; switch (sockaddr->sa_family) { #ifdef ENABLE_IPV6 case AF_INET6: { - struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&storage; - ip->type = IPV6_ADDRESS; - ADDRESS_IPV6_IN6_ADDR (ip) = sa6->sin6_addr; + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&storage; + ip->data.d6 = sa6->sin6_addr; #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID - ADDRESS_IPV6_SCOPE (ip) = sa6->sin6_scope_id; + ip->ipv6_scope = sa6->sin6_scope_id; #endif - DEBUGP (("conaddr is: %s\n", pretty_print_address (ip))); - return 1; + DEBUGP (("conaddr is: %s\n", print_address (ip))); + return true; } #endif case AF_INET: { - struct sockaddr_in *sa = (struct sockaddr_in *)&storage; - ip->type = IPV4_ADDRESS; - ADDRESS_IPV4_IN_ADDR (ip) = sa->sin_addr; - DEBUGP (("conaddr is: %s\n", pretty_print_address (ip))); - return 1; + struct sockaddr_in *sa = (struct sockaddr_in *)&storage; + ip->data.d4 = sa->sin_addr; + DEBUGP (("conaddr is: %s\n", print_address (ip))); + return true; } default: abort (); } - - return 0; } -/* Return non-zero if the error from the connect code can be - considered retryable. Wget normally retries after errors, but the - exception are the "unsupported protocol" type errors (possible on - IPv4/IPv6 dual family systems) and "connection refused". */ +/* Get the socket family of connection on FD and store + Return family type on success, -1 otherwise. + + If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local + (client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it + returns the sock family of the remote (peer's) side of the socket. */ int +socket_family (int sock, int endpoint) +{ + struct sockaddr_storage storage; + struct sockaddr *sockaddr = (struct sockaddr *) &storage; + socklen_t addrlen = sizeof (storage); + int ret; + + memset (sockaddr, 0, addrlen); + + if (endpoint == ENDPOINT_LOCAL) + ret = getsockname (sock, sockaddr, &addrlen); + else if (endpoint == ENDPOINT_PEER) + ret = getpeername (sock, sockaddr, &addrlen); + else + abort (); + + if (ret < 0) + return -1; + + return sockaddr->sa_family; +} + +/* Return true if the error from the connect code can be considered + retryable. Wget normally retries after errors, but the exception + are the "unsupported protocol" type errors (possible on IPv4/IPv6 + dual family systems) and "connection refused". */ + +bool retryable_socket_connect_error (int err) { /* Have to guard against some of these values not being defined. Cannot use a switch statement because some of the values might be equal. */ - if (0 + if (false #ifdef EAFNOSUPPORT || err == EAFNOSUPPORT #endif #ifdef EPFNOSUPPORT || err == EPFNOSUPPORT #endif -#ifdef ESOCKTNOSUPPORT /* no, "sockt" is not a typo! */ +#ifdef ESOCKTNOSUPPORT /* no, "sockt" is not a typo! */ || err == ESOCKTNOSUPPORT #endif #ifdef EPROTONOSUPPORT @@ -603,44 +645,33 @@ retryable_socket_connect_error (int err) || err == ENOPROTOOPT #endif /* Apparently, older versions of Linux and BSD used EINVAL - instead of EAFNOSUPPORT and such. */ + instead of EAFNOSUPPORT and such. */ || err == EINVAL ) - return 0; + return false; - if (err == ECONNREFUSED && !opt.retry_connrefused) - return 0; - - return 1; -} + if (!opt.retry_connrefused) + if (err == ECONNREFUSED +#ifdef ENETUNREACH + || err == ENETUNREACH /* network is unreachable */ +#endif +#ifdef EHOSTUNREACH + || err == EHOSTUNREACH /* host is unreachable */ +#endif + ) + return false; -int -socket_has_inet6 (void) -{ - static int supported = -1; - if (supported == -1) - { - int sock = socket (AF_INET6, SOCK_STREAM, 0); - if (sock < 0) - supported = 0; - else - { - xclose (sock); - supported = 1; - } - } - return supported; + return true; } -#ifdef HAVE_SELECT - -/* Wait for file descriptor FD to be readable or writable or both, - timing out after MAXTIME seconds. Returns 1 if FD is available, 0 - for timeout and -1 for error. The argument WAIT_FOR can be a - combination of WAIT_READ and WAIT_WRITE. +/* Wait for a single descriptor to become available, timing out after + MAXTIME seconds. Returns 1 if FD is available, 0 for timeout and + -1 for error. The argument WAIT_FOR can be a combination of + WAIT_FOR_READ and WAIT_FOR_WRITE. This is a mere convenience wrapper around the select call, and - should be taken as such. */ + should be taken as such (for example, it doesn't implement Wget's + 0-timeout-means-no-timeout semantics.) */ int select_fd (int fd, double maxtime, int wait_for) @@ -658,29 +689,68 @@ select_fd (int fd, double maxtime, int wait_for) wr = &fdset; tmout.tv_sec = (long) maxtime; - tmout.tv_usec = 1000000L * (maxtime - (long) maxtime); + tmout.tv_usec = 1000000 * (maxtime - (long) maxtime); do + { result = select (fd + 1, rd, wr, NULL, &tmout); +#ifdef WINDOWS + /* gnulib select() converts blocking sockets to nonblocking in windows. + wget uses blocking sockets so we must convert them back to blocking. */ + set_windows_fd_as_blocking_socket (fd); +#endif + } while (result < 0 && errno == EINTR); return result; } -#endif /* HAVE_SELECT */ - -/* Basic socket operations, mostly EINTR wrappers. */ +/* Return true iff the connection to the remote site established + through SOCK is still open. + + Specifically, this function returns true if SOCK is not ready for + reading. This is because, when the connection closes, the socket + is ready for reading because EOF is about to be delivered. A side + effect of this method is that sockets that have pending data are + considered non-open. This is actually a good thing for callers of + this function, where such pending data can only be unwanted + leftover from a previous request. */ + +bool +test_socket_open (int sock) +{ + fd_set check_set; + struct timeval to; + int ret = 0; + + /* Check if we still have a valid (non-EOF) connection. From Andrew + * Maholski's code in the Unix Socket FAQ. */ + + FD_ZERO (&check_set); + FD_SET (sock, &check_set); + + /* Wait one microsecond */ + to.tv_sec = 0; + to.tv_usec = 1; + ret = select (sock + 1, &check_set, NULL, NULL, &to); #ifdef WINDOWS -# define read(fd, buf, cnt) recv (fd, buf, cnt, 0) -# define write(fd, buf, cnt) send (fd, buf, cnt, 0) -# define close(fd) closesocket (fd) +/* gnulib select() converts blocking sockets to nonblocking in windows. +wget uses blocking sockets so we must convert them back to blocking +*/ + set_windows_fd_as_blocking_socket ( sock ); #endif -#ifdef __BEOS__ -# define read(fd, buf, cnt) recv (fd, buf, cnt, 0) -# define write(fd, buf, cnt) send (fd, buf, cnt, 0) -#endif + if ( !ret ) + /* We got a timeout, it means we're still connected. */ + return true; + else + /* Read now would not wait, it means we have either pending data + or EOF/error. */ + return false; +} + +/* Basic socket operations, mostly EINTR wrappers. */ static int sock_read (int fd, char *buf, int bufsize) @@ -695,7 +765,7 @@ sock_read (int fd, char *buf, int bufsize) static int sock_write (int fd, char *buf, int bufsize) { - int res = 0; + int res; do res = write (fd, buf, bufsize); while (res == -1 && errno == EINTR); @@ -705,11 +775,17 @@ sock_write (int fd, char *buf, int bufsize) static int sock_poll (int fd, double timeout, int wait_for) { -#ifdef HAVE_SELECT return select_fd (fd, timeout, wait_for); -#else - return 1; -#endif +} + +static int +sock_peek (int fd, char *buf, int bufsize) +{ + int res; + do + res = recv (fd, buf, bufsize, MSG_PEEK); + while (res == -1 && errno == EINTR); + return res; } static void @@ -727,17 +803,14 @@ sock_close (int fd) that are not mere file descriptors under the hood, such as SSL sockets. - That way the user code can call xread(fd, ...) and we'll run read + That way the user code can call fd_read(fd, ...) and we'll run read or SSL_read or whatever is necessary. */ static struct hash_table *transport_map; -static int transport_map_modified_tick; +static unsigned int transport_map_modified_tick; struct transport_info { - xreader_t reader; - xwriter_t writer; - xpoller_t poller; - xcloser_t closer; + struct transport_implementation *imp; void *ctx; }; @@ -749,29 +822,36 @@ struct transport_info { call getpeername, etc. */ void -register_transport (int fd, xreader_t reader, xwriter_t writer, - xpoller_t poller, xcloser_t closer, void *ctx) +fd_register_transport (int fd, struct transport_implementation *imp, void *ctx) { struct transport_info *info; /* The file descriptor must be non-negative to be registered. - Negative values are ignored by xclose(), and -1 cannot be used as + Negative values are ignored by fd_close(), and -1 cannot be used as hash key. */ assert (fd >= 0); info = xnew (struct transport_info); - info->reader = reader; - info->writer = writer; - info->poller = poller; - info->closer = closer; + info->imp = imp; info->ctx = ctx; if (!transport_map) transport_map = hash_table_new (0, NULL, NULL); - hash_table_put (transport_map, (void *) fd, info); + hash_table_put (transport_map, (void *)(intptr_t) fd, info); ++transport_map_modified_tick; } -/* When xread/xwrite are called multiple times in a loop, they should +/* Return context of the transport registered with + fd_register_transport. This assumes fd_register_transport was + previously called on FD. */ + +void * +fd_transport_context (int fd) +{ + struct transport_info *info = hash_table_get (transport_map, (void *)(intptr_t) fd); + return info->ctx; +} + +/* When fd_read/fd_write are called multiple times in a loop, they should remember the INFO pointer instead of fetching it every time. It is not enough to compare FD to LAST_FD because FD might have been closed and reopened. modified_tick ensures that changes to @@ -780,118 +860,169 @@ register_transport (int fd, xreader_t reader, xwriter_t writer, This is a macro because we want the static storage variables to be per-function. */ -#define LAZY_RETRIEVE_INFO(info) do { \ - static struct transport_info *last_info; \ - static int last_fd = -1, last_tick; \ - if (!transport_map) \ - info = NULL; \ - else if (last_fd == fd && last_tick == transport_map_modified_tick) \ - info = last_info; \ - else \ - { \ - info = hash_table_get (transport_map, (void *) fd); \ - last_fd = fd; \ - last_info = info; \ - last_tick = transport_map_modified_tick; \ - } \ +#define LAZY_RETRIEVE_INFO(info) do { \ + static struct transport_info *last_info; \ + static int last_fd = -1; \ + static unsigned int last_tick; \ + if (!transport_map) \ + info = NULL; \ + else if (last_fd == fd && last_tick == transport_map_modified_tick) \ + info = last_info; \ + else \ + { \ + info = hash_table_get (transport_map, (void *)(intptr_t) fd); \ + last_fd = fd; \ + last_info = info; \ + last_tick = transport_map_modified_tick; \ + } \ } while (0) -/* Read no more than BUFSIZE bytes of data from FD, storing them to - BUF. If TIMEOUT is non-zero, the operation aborts if no data is - received after that many seconds. If TIMEOUT is -1, the value of - opt.timeout is used for TIMEOUT. */ - -int -xread (int fd, char *buf, int bufsize, double timeout) +static bool +poll_internal (int fd, struct transport_info *info, int wf, double timeout) { - struct transport_info *info; - LAZY_RETRIEVE_INFO (info); if (timeout == -1) timeout = opt.read_timeout; if (timeout) { int test; - if (info && info->poller) - test = info->poller (fd, timeout, WAIT_FOR_READ, info->ctx); + if (info && info->imp->poller) + test = info->imp->poller (fd, timeout, wf, info->ctx); else - test = sock_poll (fd, timeout, WAIT_FOR_READ); + test = sock_poll (fd, timeout, wf); if (test == 0) - errno = ETIMEDOUT; + errno = ETIMEDOUT; if (test <= 0) - return -1; + return false; } - if (info && info->reader) - return info->reader (fd, buf, bufsize, info->ctx); + return true; +} + +/* Read no more than BUFSIZE bytes of data from FD, storing them to + BUF. If TIMEOUT is non-zero, the operation aborts if no data is + received after that many seconds. If TIMEOUT is -1, the value of + opt.timeout is used for TIMEOUT. */ + +int +fd_read (int fd, char *buf, int bufsize, double timeout) +{ + struct transport_info *info; + LAZY_RETRIEVE_INFO (info); + if (!poll_internal (fd, info, WAIT_FOR_READ, timeout)) + return -1; + if (info && info->imp->reader) + return info->imp->reader (fd, buf, bufsize, info->ctx); else return sock_read (fd, buf, bufsize); } +/* Like fd_read, except it provides a "preview" of the data that will + be read by subsequent calls to fd_read. Specifically, it copies no + more than BUFSIZE bytes of the currently available data to BUF and + returns the number of bytes copied. Return values and timeout + semantics are the same as those of fd_read. + + CAVEAT: Do not assume that the first subsequent call to fd_read + will retrieve the same amount of data. Reading can return more or + less data, depending on the TCP implementation and other + circumstances. However, barring an error, it can be expected that + all the peeked data will eventually be read by fd_read. */ + +int +fd_peek (int fd, char *buf, int bufsize, double timeout) +{ + struct transport_info *info; + LAZY_RETRIEVE_INFO (info); + if (!poll_internal (fd, info, WAIT_FOR_READ, timeout)) + return -1; + if (info && info->imp->peeker) + return info->imp->peeker (fd, buf, bufsize, info->ctx); + else + return sock_peek (fd, buf, bufsize); +} + /* Write the entire contents of BUF to FD. If TIMEOUT is non-zero, the operation aborts if no data is received after that many seconds. If TIMEOUT is -1, the value of opt.timeout is used for TIMEOUT. */ int -xwrite (int fd, char *buf, int bufsize, double timeout) +fd_write (int fd, char *buf, int bufsize, double timeout) { int res; struct transport_info *info; LAZY_RETRIEVE_INFO (info); - if (timeout == -1) - timeout = opt.read_timeout; /* `write' may write less than LEN bytes, thus the loop keeps trying it until all was written, or an error occurred. */ res = 0; while (bufsize > 0) { - if (timeout) - { - int test; - if (info && info->poller) - test = info->poller (fd, timeout, WAIT_FOR_WRITE, info->ctx); - else - test = sock_poll (fd, timeout, WAIT_FOR_WRITE); - if (test == 0) - errno = ETIMEDOUT; - if (test <= 0) - return -1; - } - if (info && info->writer) - res = info->writer (fd, buf, bufsize, info->ctx); + if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout)) + return -1; + if (info && info->imp->writer) + res = info->imp->writer (fd, buf, bufsize, info->ctx); else - res = sock_write (fd, buf, bufsize); + res = sock_write (fd, buf, bufsize); if (res <= 0) - break; + break; buf += res; bufsize -= res; } return res; } +/* Report the most recent error(s) on FD. This should only be called + after fd_* functions, such as fd_read and fd_write, and only if + they return a negative result. For errors coming from other calls + such as setsockopt or fopen, strerror should continue to be + used. + + If the transport doesn't support error messages or doesn't supply + one, strerror(errno) is returned. The returned error message + should not be used after fd_close has been called. */ + +const char * +fd_errstr (int fd) +{ + /* Don't bother with LAZY_RETRIEVE_INFO, as this will only be called + in case of error, never in a tight loop. */ + struct transport_info *info = NULL; + if (transport_map) + info = hash_table_get (transport_map, (void *)(intptr_t) fd); + + if (info && info->imp->errstr) + { + const char *err = info->imp->errstr (fd, info->ctx); + if (err) + return err; + /* else, fall through and print the system error. */ + } + return strerror (errno); +} + /* Close the file descriptor FD. */ void -xclose (int fd) +fd_close (int fd) { struct transport_info *info; if (fd < 0) return; - /* Don't use LAZY_RETRIEVE_INFO because xclose() is only called once + /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once per socket, so that particular optimization wouldn't work. */ info = NULL; if (transport_map) - info = hash_table_get (transport_map, (void *) fd); + info = hash_table_get (transport_map, (void *)(intptr_t) fd); - if (info && info->closer) - info->closer (fd, info->ctx); + if (info && info->imp->closer) + info->imp->closer (fd, info->ctx); else sock_close (fd); if (info) { - hash_table_remove (transport_map, (void *) fd); + hash_table_remove (transport_map, (void *)(intptr_t) fd); xfree (info); ++transport_map_modified_tick; }