X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fconnect.c;h=eeb4eb288f9a9d7b5782d8a93b14c28a0486d445;hb=d9fea91a0a319e348adb504bd3edff148ff3d8a0;hp=c65868d89399f0c95dfa29f2c16e37a054bd82cb;hpb=2d00f882e033a97531ee3420d231efd84e0a28a4;p=wget diff --git a/src/connect.c b/src/connect.c index c65868d8..eeb4eb28 100644 --- a/src/connect.c +++ b/src/connect.c @@ -60,6 +60,7 @@ so, delete this exception statement from your version. */ #include "utils.h" #include "host.h" #include "connect.h" +#include "hash.h" #ifndef errno extern int errno; @@ -105,7 +106,7 @@ sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port) you're not interested in one or the other information, pass NULL as the pointer. */ -void +static void sockaddr_get_data (const struct sockaddr *sa, ip_address *ip, int *port) { switch (sa->sa_family) @@ -165,28 +166,41 @@ sockaddr_size (const struct sockaddr *sa) } static int -resolve_bind_address (const char *host, struct sockaddr *sa, int flags) +resolve_bind_address (struct sockaddr *sa) { struct address_list *al; - /* #### Shouldn't we do this only once? opt.bind_address won't - change during a Wget run! */ + /* Make sure this is called only once. opt.bind_address doesn't + change during a Wget run. */ + static int called, should_bind; + static ip_address ip; + if (called) + { + if (should_bind) + sockaddr_set_data (sa, &ip, 0); + return should_bind; + } + called = 1; - al = lookup_host (host, flags | LH_SILENT | LH_PASSIVE); - if (al == NULL) + al = lookup_host (opt.bind_address, LH_BIND | LH_SILENT); + if (!al) { - /* #### We should print the error message here. */ + /* #### We should be able to print the error message here. */ logprintf (LOG_NOTQUIET, _("%s: unable to resolve bind address `%s'; disabling bind.\n"), exec_name, opt.bind_address); + should_bind = 0; return 0; } /* Pick the first address in the list and use it as bind address. - Perhaps we should try multiple addresses, but I don't think - that's necessary in practice. */ - sockaddr_set_data (sa, address_list_address_at (al, 0), 0); + Perhaps we should try multiple addresses in succession, but I + don't think that's necessary in practice. */ + ip = *address_list_address_at (al, 0); address_list_release (al); + + sockaddr_set_data (sa, &ip, 0); + should_bind = 1; return 1; } @@ -227,7 +241,10 @@ connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen, return ctx.result; } -/* Connect to a remote endpoint whose IP address is known. */ +/* Connect via TCP to the specified address and port. + + If PRINT is non-NULL, it is the host name to print that we're + connecting to. */ int connect_to_ip (const ip_address *ip, int port, const char *print) @@ -279,7 +296,7 @@ connect_to_ip (const ip_address *ip, int port, const char *print) address. */ struct sockaddr_storage bind_ss; struct sockaddr *bind_sa = (struct sockaddr *)&bind_ss; - if (resolve_bind_address (opt.bind_address, bind_sa, 0)) + if (resolve_bind_address (bind_sa)) { if (bind (sock, bind_sa, sockaddr_size (bind_sa)) < 0) goto err; @@ -304,7 +321,7 @@ connect_to_ip (const ip_address *ip, int port, const char *print) logprintf. */ int save_errno = errno; if (sock >= 0) - CLOSE (sock); + fd_close (sock); if (print) logprintf (LOG_VERBOSE, "failed: %s.\n", strerror (errno)); errno = save_errno; @@ -312,17 +329,21 @@ connect_to_ip (const ip_address *ip, int port, const char *print) } } -/* Connect to a remote endpoint specified by host name. */ +/* Connect via TCP to a remote host on the specified port. + + HOST is resolved as an Internet host name. If HOST resolves to + more than one IP address, they are tried in the order returned by + DNS until connecting to one of them succeeds. */ int connect_to_host (const char *host, int port) { int i, start, end; - struct address_list *al; - int sock = -1; + int sock; - again: - al = lookup_host (host, 0); + struct address_list *al = lookup_host (host, 0); + + retry: if (!al) return E_HOST; @@ -332,72 +353,54 @@ connect_to_host (const char *host, int port) const ip_address *ip = address_list_address_at (al, i); sock = connect_to_ip (ip, port, host); if (sock >= 0) - /* Success. */ - break; - - address_list_set_faulty (al, i); + { + /* Success. */ + address_list_set_connected (al); + address_list_release (al); + return sock; + } /* The attempt to connect has failed. Continue with the loop and try next address. */ - } - address_list_release (al); - if (sock < 0 && address_list_cached_p (al)) - { - /* We were unable to connect to any address in a list we've - obtained from cache. There is a possibility that the host is - under dynamic DNS and has changed its address. Resolve it - again. */ - forget_host_lookup (host); - goto again; + address_list_set_faulty (al, i); } - return sock; -} - -int -test_socket_open (int sock) -{ -#ifdef HAVE_SELECT - fd_set check_set; - struct timeval to; - - /* Check if we still have a valid (non-EOF) connection. From Andrew - * Maholski's code in the Unix Socket FAQ. */ + /* Failed to connect to any of the addresses in AL. */ - FD_ZERO (&check_set); - FD_SET (sock, &check_set); - - /* Wait one microsecond */ - to.tv_sec = 0; - to.tv_usec = 1; - - /* If we get a timeout, then that means still connected */ - if (select (sock + 1, &check_set, NULL, NULL, &to) == 0) + if (address_list_connected_p (al)) { - /* Connection is valid (not EOF), so continue */ - return 1; + /* We connected to AL before, but cannot do so now. That might + indicate that our DNS cache entry for HOST has expired. */ + address_list_release (al); + al = lookup_host (host, LH_REFRESH); + goto retry; } - else - return 0; -#else - /* Without select, it's hard to know for sure. */ - return 1; -#endif + address_list_release (al); + + return -1; } + +/* Create a socket, bind it to local interface BIND_ADDRESS on port + *PORT, set up a listen backlog, and return the resulting socket, or + -1 in case of error. -/* Create a socket and bind it to PORT locally. Calling accept() on - such a socket waits for and accepts incoming TCP connections. The - resulting socket is stored to LOCAL_SOCK. */ + BIND_ADDRESS is the address of the interface to bind to. If it is + NULL, the socket is bound to the default address. PORT should + point to the port number that will be used for the binding. If + that number is 0, the system will choose a suitable port, and the + chosen value will be written to *PORT. -uerr_t -bindport (const ip_address *bind_address, int *port, int *local_sock) + Calling accept() on such a socket waits for and accepts incoming + TCP connections. */ + +int +bind_local (const ip_address *bind_address, int *port) { int sock; int family = AF_INET; struct sockaddr_storage ss; struct sockaddr *sa = (struct sockaddr *)&ss; - xzero (ss); /* For setting options with setsockopt. */ int setopt_val = 1; @@ -409,8 +412,9 @@ bindport (const ip_address *bind_address, int *port, int *local_sock) family = AF_INET6; #endif - if ((sock = socket (family, SOCK_STREAM, 0)) < 0) - return CONSOCKERR; + sock = socket (family, SOCK_STREAM, 0); + if (sock < 0) + return -1; #ifdef SO_REUSEADDR setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size); @@ -423,20 +427,27 @@ bindport (const ip_address *bind_address, int *port, int *local_sock) # endif #endif + xzero (ss); sockaddr_set_data (sa, bind_address, *port); if (bind (sock, sa, sockaddr_size (sa)) < 0) { - CLOSE (sock); - return BINDERR; + fd_close (sock); + return -1; } DEBUGP (("Local socket fd %d bound.\n", sock)); - if (!*port) + + /* If *PORT is 0, find out which port we've bound to. */ + if (*port == 0) { socklen_t sa_len = sockaddr_size (sa); if (getsockname (sock, sa, &sa_len) < 0) { - CLOSE (sock); - return CONPORTERR; + /* If we can't find out the socket's local address ("name"), + something is seriously wrong with the socket, and it's + unusable for us anyway because we must know the chosen + port. */ + fd_close (sock); + return -1; } sockaddr_get_data (sa, NULL, port); DEBUGP (("binding to address %s using port %i.\n", @@ -444,86 +455,68 @@ bindport (const ip_address *bind_address, int *port, int *local_sock) } if (listen (sock, 1) < 0) { - CLOSE (sock); - return LISTENERR; + fd_close (sock); + return -1; } - *local_sock = sock; - return BINDOK; + return sock; } -#ifdef HAVE_SELECT -/* Wait for file descriptor FD to be readable or writable or both, - timing out after MAXTIME seconds. Returns 1 if FD is available, 0 - for timeout and -1 for error. The argument WHAT can be a - combination of WAIT_READ and WAIT_WRITE. - - This is a mere convenience wrapper around the select call, and - should be taken as such. */ +/* Like a call to accept(), but with the added check for timeout. -int -select_fd (int fd, double maxtime, int wait) -{ - fd_set fdset; - fd_set *rd = NULL, *wr = NULL; - struct timeval tmout; - int result; + In other words, accept a client connection on LOCAL_SOCK, and + return the new socket used for communication with the client. + LOCAL_SOCK should have been bound, e.g. using bind_local(). - FD_ZERO (&fdset); - FD_SET (fd, &fdset); - if (wait & WAIT_READ) - rd = &fdset; - if (wait & WAIT_WRITE) - wr = &fdset; - - tmout.tv_sec = (long)maxtime; - tmout.tv_usec = 1000000L * (maxtime - (long)maxtime); - - do - result = select (fd + 1, rd, wr, NULL, &tmout); - while (result < 0 && errno == EINTR); - - /* When we've timed out, set errno to ETIMEDOUT for the convenience - of the caller. */ - if (result == 0) - errno = ETIMEDOUT; - - return result; -} -#endif /* HAVE_SELECT */ - -/* Accept a connection on LOCAL_SOCK, and store the new socket to - *SOCK. It blocks the caller until a connection is established. If - no connection is established for opt.connect_timeout seconds, the + The caller is blocked until a connection is established. If no + connection is established for opt.connect_timeout seconds, the function exits with an error status. */ -uerr_t -acceptport (int local_sock, int *sock) +int +accept_connection (int local_sock) { + int sock; + + /* We don't need the values provided by accept, but accept + apparently requires them to be present. */ struct sockaddr_storage ss; struct sockaddr *sa = (struct sockaddr *)&ss; socklen_t addrlen = sizeof (ss); -#ifdef HAVE_SELECT if (opt.connect_timeout) - if (select_fd (local_sock, opt.connect_timeout, WAIT_READ) <= 0) - return ACCEPTERR; -#endif - if ((*sock = accept (local_sock, sa, &addrlen)) < 0) - return ACCEPTERR; - DEBUGP (("Created socket fd %d.\n", *sock)); - return ACCEPTOK; + { + int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ); + if (test == 0) + errno = ETIMEDOUT; + if (test <= 0) + return -1; + } + sock = accept (local_sock, sa, &addrlen); + DEBUGP (("Accepted client at socket %d.\n", sock)); + return sock; } -/* Return the local IP address associated with the connection on FD. */ +/* Get the IP address associated with the connection on FD and store + it to IP. Return 1 on success, 0 otherwise. + + If ENDPOINT is ENDPOINT_LOCAL, it returns the address of the local + (client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it + returns the address of the remote (peer's) side of the socket. */ int -conaddr (int fd, ip_address *ip) +socket_ip_address (int sock, ip_address *ip, int endpoint) { struct sockaddr_storage storage; struct sockaddr *sockaddr = (struct sockaddr *)&storage; socklen_t addrlen = sizeof (storage); + int ret; - if (getsockname (fd, sockaddr, &addrlen) < 0) + if (endpoint == ENDPOINT_LOCAL) + ret = getsockname (sock, sockaddr, &addrlen); + else if (endpoint == ENDPOINT_PEER) + ret = getpeername (sock, sockaddr, &addrlen); + else + abort (); + if (ret < 0) return 0; switch (sockaddr->sa_family) @@ -556,57 +549,398 @@ conaddr (int fd, ip_address *ip) return 0; } -/* Read at most LEN bytes from FD, storing them to BUF. This is - virtually the same as read(), but takes care of EINTR braindamage - and uses select() to timeout the stale connections (a connection is - stale if more than OPT.READ_TIMEOUT time is spent in select() or - read()). */ +/* Return non-zero if the error from the connect code can be + considered retryable. Wget normally retries after errors, but the + exception are the "unsupported protocol" type errors (possible on + IPv4/IPv6 dual family systems) and "connection refused". */ int -iread (int fd, char *buf, int len) +retryable_socket_connect_error (int err) { - int res; + /* Have to guard against some of these values not being defined. + Cannot use a switch statement because some of the values might be + equal. */ + if (0 +#ifdef EAFNOSUPPORT + || err == EAFNOSUPPORT +#endif +#ifdef EPFNOSUPPORT + || err == EPFNOSUPPORT +#endif +#ifdef ESOCKTNOSUPPORT /* no, "sockt" is not a typo! */ + || err == ESOCKTNOSUPPORT +#endif +#ifdef EPROTONOSUPPORT + || err == EPROTONOSUPPORT +#endif +#ifdef ENOPROTOOPT + || err == ENOPROTOOPT +#endif + /* Apparently, older versions of Linux and BSD used EINVAL + instead of EAFNOSUPPORT and such. */ + || err == EINVAL + ) + return 0; + + if (err == ECONNREFUSED && !opt.retry_connrefused) + return 0; + + return 1; +} + +#ifdef ENABLE_IPV6 +# ifndef HAVE_GETADDRINFO_AI_ADDRCONFIG + +/* Return non-zero if the INET6 socket family is supported on the + system. + + This doesn't guarantee that we're able to connect to IPv6 hosts, + but it's better than nothing. It is only used on systems where + getaddrinfo doesn't support AI_ADDRCONFIG. (See lookup_host.) */ +int +socket_has_inet6 (void) +{ + static int supported = -1; + if (supported == -1) + { + int sock = socket (AF_INET6, SOCK_STREAM, 0); + if (sock < 0) + supported = 0; + else + { + fd_close (sock); + supported = 1; + } + } + return supported; +} + +# endif/* not HAVE_GETADDRINFO_AI_ADDRCONFIG */ +#endif /* ENABLE_IPV6 */ + +/* Wait for a single descriptor to become available, timing out after + MAXTIME seconds. Returns 1 if FD is available, 0 for timeout and + -1 for error. The argument WAIT_FOR can be a combination of + WAIT_FOR_READ and WAIT_FOR_WRITE. + + This is a mere convenience wrapper around the select call, and + should be taken as such (for example, it doesn't implement Wget's + 0-timeout-means-no-timeout semantics.) */ + +int +select_fd (int fd, double maxtime, int wait_for) +{ #ifdef HAVE_SELECT - if (opt.read_timeout) - if (select_fd (fd, opt.read_timeout, WAIT_READ) <= 0) - return -1; + fd_set fdset; + fd_set *rd = NULL, *wr = NULL; + struct timeval tmout; + int result; + + FD_ZERO (&fdset); + FD_SET (fd, &fdset); + if (wait_for & WAIT_FOR_READ) + rd = &fdset; + if (wait_for & WAIT_FOR_WRITE) + wr = &fdset; + + tmout.tv_sec = (long) maxtime; + tmout.tv_usec = 1000000L * (maxtime - (long) maxtime); + + do + result = select (fd + 1, rd, wr, NULL, &tmout); + while (result < 0 && errno == EINTR); + + return result; + +#else /* not HAVE_SELECT */ + + /* If select() unavailable, just return 1. In most usages in Wget, + this is the appropriate response -- "if we can't poll, go ahead + with the blocking operation". If a specific part of code needs + different behavior, it can use #ifdef HAVE_SELECT to test whether + polling really occurs. */ + return 1; + +#endif /* not HAVE_SELECT */ +} + +int +test_socket_open (int sock) +{ +#ifdef HAVE_SELECT + fd_set check_set; + struct timeval to; + + /* Check if we still have a valid (non-EOF) connection. From Andrew + * Maholski's code in the Unix Socket FAQ. */ + + FD_ZERO (&check_set); + FD_SET (sock, &check_set); + + /* Wait one microsecond */ + to.tv_sec = 0; + to.tv_usec = 1; + + /* If we get a timeout, then that means still connected */ + if (select (sock + 1, &check_set, NULL, NULL, &to) == 0) + { + /* Connection is valid (not EOF), so continue */ + return 1; + } + else + return 0; +#else + /* Without select, it's hard to know for sure. */ + return 1; +#endif +} + +/* Basic socket operations, mostly EINTR wrappers. */ + +#ifdef WINDOWS +# define read(fd, buf, cnt) recv (fd, buf, cnt, 0) +# define write(fd, buf, cnt) send (fd, buf, cnt, 0) +# define close(fd) closesocket (fd) +#endif + +#ifdef __BEOS__ +# define read(fd, buf, cnt) recv (fd, buf, cnt, 0) +# define write(fd, buf, cnt) send (fd, buf, cnt, 0) #endif + +static int +sock_read (int fd, char *buf, int bufsize) +{ + int res; do - res = READ (fd, buf, len); + res = read (fd, buf, bufsize); while (res == -1 && errno == EINTR); + return res; +} +static int +sock_write (int fd, char *buf, int bufsize) +{ + int res; + do + res = write (fd, buf, bufsize); + while (res == -1 && errno == EINTR); + return res; +} + +static int +sock_poll (int fd, double timeout, int wait_for) +{ + return select_fd (fd, timeout, wait_for); +} + +static int +sock_peek (int fd, char *buf, int bufsize) +{ + int res; + do + res = recv (fd, buf, bufsize, MSG_PEEK); + while (res == -1 && errno == EINTR); return res; } -/* Write LEN bytes from BUF to FD. This is similar to iread(), but - unlike iread(), it makes sure that all of BUF is actually written - to FD, so callers needn't bother with checking that the return - value equals to LEN. Instead, you should simply check for -1. */ +static void +sock_close (int fd) +{ + close (fd); + DEBUGP (("Closed fd %d\n", fd)); +} +#undef read +#undef write +#undef close + +/* Reading and writing from the network. We build around the socket + (file descriptor) API, but support "extended" operations for things + that are not mere file descriptors under the hood, such as SSL + sockets. + + That way the user code can call fd_read(fd, ...) and we'll run read + or SSL_read or whatever is necessary. */ + +static struct hash_table *transport_map; +static int transport_map_modified_tick; + +struct transport_info { + fd_reader_t reader; + fd_writer_t writer; + fd_poller_t poller; + fd_peeker_t peeker; + fd_closer_t closer; + void *ctx; +}; + +/* Register the transport layer operations that will be used when + reading, writing, and polling FD. + + This should be used for transport layers like SSL that piggyback on + sockets. FD should otherwise be a real socket, on which you can + call getpeername, etc. */ + +void +fd_register_transport (int fd, fd_reader_t reader, fd_writer_t writer, + fd_poller_t poller, fd_peeker_t peeker, + fd_closer_t closer, void *ctx) +{ + struct transport_info *info; + + /* The file descriptor must be non-negative to be registered. + Negative values are ignored by fd_close(), and -1 cannot be used as + hash key. */ + assert (fd >= 0); + + info = xnew (struct transport_info); + info->reader = reader; + info->writer = writer; + info->poller = poller; + info->peeker = peeker; + info->closer = closer; + info->ctx = ctx; + if (!transport_map) + transport_map = hash_table_new (0, NULL, NULL); + hash_table_put (transport_map, (void *) fd, info); + ++transport_map_modified_tick; +} + +/* When fd_read/fd_write are called multiple times in a loop, they should + remember the INFO pointer instead of fetching it every time. It is + not enough to compare FD to LAST_FD because FD might have been + closed and reopened. modified_tick ensures that changes to + transport_map will not be unnoticed. + + This is a macro because we want the static storage variables to be + per-function. */ + +#define LAZY_RETRIEVE_INFO(info) do { \ + static struct transport_info *last_info; \ + static int last_fd = -1, last_tick; \ + if (!transport_map) \ + info = NULL; \ + else if (last_fd == fd && last_tick == transport_map_modified_tick) \ + info = last_info; \ + else \ + { \ + info = hash_table_get (transport_map, (void *) fd); \ + last_fd = fd; \ + last_info = info; \ + last_tick = transport_map_modified_tick; \ + } \ +} while (0) + +static int +poll_internal (int fd, struct transport_info *info, int wf, double timeout) +{ + if (timeout == -1) + timeout = opt.read_timeout; + if (timeout) + { + int test; + if (info && info->poller) + test = info->poller (fd, timeout, wf, info->ctx); + else + test = sock_poll (fd, timeout, wf); + if (test == 0) + errno = ETIMEDOUT; + if (test <= 0) + return 0; + } + return 1; +} + +/* Read no more than BUFSIZE bytes of data from FD, storing them to + BUF. If TIMEOUT is non-zero, the operation aborts if no data is + received after that many seconds. If TIMEOUT is -1, the value of + opt.timeout is used for TIMEOUT. */ + +int +fd_read (int fd, char *buf, int bufsize, double timeout) +{ + struct transport_info *info; + LAZY_RETRIEVE_INFO (info); + if (!poll_internal (fd, info, WAIT_FOR_READ, timeout)) + return -1; + if (info && info->reader) + return info->reader (fd, buf, bufsize, info->ctx); + else + return sock_read (fd, buf, bufsize); +} + +/* The same as xread, but don't actually read the data, just copy it + instead. */ + +int +fd_peek (int fd, char *buf, int bufsize, double timeout) +{ + struct transport_info *info; + LAZY_RETRIEVE_INFO (info); + if (!poll_internal (fd, info, WAIT_FOR_READ, timeout)) + return -1; + if (info && info->peeker) + return info->peeker (fd, buf, bufsize, info->ctx); + else + return sock_peek (fd, buf, bufsize); +} + +/* Write the entire contents of BUF to FD. If TIMEOUT is non-zero, + the operation aborts if no data is received after that many + seconds. If TIMEOUT is -1, the value of opt.timeout is used for + TIMEOUT. */ int -iwrite (int fd, char *buf, int len) +fd_write (int fd, char *buf, int bufsize, double timeout) { - int res = 0; + int res; + struct transport_info *info; + LAZY_RETRIEVE_INFO (info); - /* `write' may write less than LEN bytes, thus the outward loop - keeps trying it until all was written, or an error occurred. The - inner loop is reserved for the usual EINTR f*kage, and the - innermost loop deals with the same during select(). */ - while (len > 0) + /* `write' may write less than LEN bytes, thus the loop keeps trying + it until all was written, or an error occurred. */ + res = 0; + while (bufsize > 0) { -#ifdef HAVE_SELECT - if (opt.read_timeout) - if (select_fd (fd, opt.read_timeout, WAIT_WRITE) <= 0) - return -1; -#endif - do - res = WRITE (fd, buf, len); - while (res == -1 && errno == EINTR); + if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout)) + return -1; + if (info && info->writer) + res = info->writer (fd, buf, bufsize, info->ctx); + else + res = sock_write (fd, buf, bufsize); if (res <= 0) break; buf += res; - len -= res; + bufsize -= res; } return res; } + +/* Close the file descriptor FD. */ + +void +fd_close (int fd) +{ + struct transport_info *info; + if (fd < 0) + return; + + /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once + per socket, so that particular optimization wouldn't work. */ + info = NULL; + if (transport_map) + info = hash_table_get (transport_map, (void *) fd); + + if (info && info->closer) + info->closer (fd, info->ctx); + else + sock_close (fd); + + if (info) + { + hash_table_remove (transport_map, (void *) fd); + xfree (info); + ++transport_map_modified_tick; + } +}