1 /* Establishing and handling network connections.
2 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
39 #include <sys/socket.h>
40 #include <sys/select.h>
45 # else /* def __VMS */
47 # endif /* def __VMS [else] */
48 # include <netinet/in.h>
50 # include <arpa/inet.h>
52 #endif /* not WINDOWS */
62 /* Apparently needed for Interix: */
67 /* Define sockaddr_storage where unavailable (presumably on IPv4-only
71 # ifndef HAVE_STRUCT_SOCKADDR_STORAGE
72 # define sockaddr_storage sockaddr_in
74 #endif /* ENABLE_IPV6 */
76 /* Fill SA as per the data in IP and PORT. SA shoult point to struct
77 sockaddr_storage if ENABLE_IPV6 is defined, to struct sockaddr_in
81 sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port)
87 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
89 sin->sin_family = AF_INET;
90 sin->sin_port = htons (port);
91 sin->sin_addr = ip->data.d4;
97 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
99 sin6->sin6_family = AF_INET6;
100 sin6->sin6_port = htons (port);
101 sin6->sin6_addr = ip->data.d6;
102 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
103 sin6->sin6_scope_id = ip->ipv6_scope;
107 #endif /* ENABLE_IPV6 */
113 /* Get the data of SA, specifically the IP address and the port. If
114 you're not interested in one or the other information, pass NULL as
118 sockaddr_get_data (const struct sockaddr *sa, ip_address *ip, int *port)
120 switch (sa->sa_family)
124 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
127 ip->family = AF_INET;
128 ip->data.d4 = sin->sin_addr;
131 *port = ntohs (sin->sin_port);
137 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
140 ip->family = AF_INET6;
141 ip->data.d6 = sin6->sin6_addr;
142 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
143 ip->ipv6_scope = sin6->sin6_scope_id;
147 *port = ntohs (sin6->sin6_port);
156 /* Return the size of the sockaddr structure depending on its
160 sockaddr_size (const struct sockaddr *sa)
162 switch (sa->sa_family)
165 return sizeof (struct sockaddr_in);
168 return sizeof (struct sockaddr_in6);
175 /* Resolve the bind address specified via --bind-address and store it
176 to SA. The resolved value is stored in a static variable and
177 reused after the first invocation of this function.
179 Returns true on success, false on failure. */
182 resolve_bind_address (struct sockaddr *sa)
184 struct address_list *al;
186 /* Make sure this is called only once. opt.bind_address doesn't
187 change during a Wget run. */
188 static bool called, should_bind;
189 static ip_address ip;
193 sockaddr_set_data (sa, &ip, 0);
198 al = lookup_host (opt.bind_address, LH_BIND | LH_SILENT);
201 /* #### We should be able to print the error message here. */
202 logprintf (LOG_NOTQUIET,
203 _("%s: unable to resolve bind address %s; disabling bind.\n"),
204 exec_name, quote (opt.bind_address));
209 /* Pick the first address in the list and use it as bind address.
210 Perhaps we should try multiple addresses in succession, but I
211 don't think that's necessary in practice. */
212 ip = *address_list_address_at (al, 0);
213 address_list_release (al);
215 sockaddr_set_data (sa, &ip, 0);
222 const struct sockaddr *addr;
228 connect_with_timeout_callback (void *arg)
230 struct cwt_context *ctx = (struct cwt_context *)arg;
231 ctx->result = connect (ctx->fd, ctx->addr, ctx->addrlen);
234 /* Like connect, but specifies a timeout. If connecting takes longer
235 than TIMEOUT seconds, -1 is returned and errno is set to
239 connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen,
242 struct cwt_context ctx;
245 ctx.addrlen = addrlen;
247 if (run_with_timeout (timeout, connect_with_timeout_callback, &ctx))
252 if (ctx.result == -1 && errno == EINTR)
257 /* Connect via TCP to the specified address and port.
259 If PRINT is non-NULL, it is the host name to print that we're
263 connect_to_ip (const ip_address *ip, int port, const char *print)
265 struct sockaddr_storage ss;
266 struct sockaddr *sa = (struct sockaddr *)&ss;
269 /* If PRINT is non-NULL, print the "Connecting to..." line, with
270 PRINT being the host name we're connecting to. */
273 const char *txt_addr = print_address (ip);
274 if (0 != strcmp (print, txt_addr))
276 char *str = NULL, *name;
278 if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
280 int len = strlen (print) + strlen (name) + 4;
282 snprintf (str, len, "%s (%s)", name, print);
287 logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
288 str ? str : escnonprint_uri (print), txt_addr, port);
295 if (ip->family == AF_INET)
296 logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
297 else if (ip->family == AF_INET6)
298 logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port);
302 /* Store the sockaddr info to SA. */
303 sockaddr_set_data (sa, ip, port);
305 /* Create the socket of the family appropriate for the address. */
306 sock = socket (sa->sa_family, SOCK_STREAM, 0);
310 #if defined(ENABLE_IPV6) && defined(IPV6_V6ONLY)
313 /* In case of error, we will go on anyway... */
314 int err = setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof (on));
317 DEBUGP (("Failed setting IPV6_V6ONLY: %s", strerror (errno)));
321 /* For very small rate limits, set the buffer size (and hence,
322 hopefully, the kernel's TCP window size) to the per-second limit.
323 That way we should never have to sleep for more than 1s between
325 if (opt.limit_rate && opt.limit_rate < 8192)
327 int bufsize = opt.limit_rate;
329 bufsize = 512; /* avoid pathologically small values */
331 setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
332 (void *)&bufsize, (socklen_t)sizeof (bufsize));
334 /* When we add limit_rate support for writing, which is useful
335 for POST, we should also set SO_SNDBUF here. */
338 if (opt.bind_address)
340 /* Bind the client side of the socket to the requested
342 struct sockaddr_storage bind_ss;
343 struct sockaddr *bind_sa = (struct sockaddr *)&bind_ss;
344 if (resolve_bind_address (bind_sa))
346 if (bind (sock, bind_sa, sockaddr_size (bind_sa)) < 0)
351 /* Connect the socket to the remote endpoint. */
352 if (connect_with_timeout (sock, sa, sockaddr_size (sa),
353 opt.connect_timeout) < 0)
359 logprintf (LOG_VERBOSE, _("connected.\n"));
360 DEBUGP (("Created socket %d.\n", sock));
365 /* Protect errno from possible modifications by close and
367 int save_errno = errno;
371 logprintf (LOG_VERBOSE, _("failed: %s.\n"), strerror (errno));
377 /* Connect via TCP to a remote host on the specified port.
379 HOST is resolved as an Internet host name. If HOST resolves to
380 more than one IP address, they are tried in the order returned by
381 DNS until connecting to one of them succeeds. */
384 connect_to_host (const char *host, int port)
389 struct address_list *al = lookup_host (host, 0);
394 logprintf (LOG_NOTQUIET,
395 _("%s: unable to resolve host address %s\n"),
396 exec_name, quote (host));
400 address_list_get_bounds (al, &start, &end);
401 for (i = start; i < end; i++)
403 const ip_address *ip = address_list_address_at (al, i);
404 sock = connect_to_ip (ip, port, host);
408 address_list_set_connected (al);
409 address_list_release (al);
413 /* The attempt to connect has failed. Continue with the loop
414 and try next address. */
416 address_list_set_faulty (al, i);
419 /* Failed to connect to any of the addresses in AL. */
421 if (address_list_connected_p (al))
423 /* We connected to AL before, but cannot do so now. That might
424 indicate that our DNS cache entry for HOST has expired. */
425 address_list_release (al);
426 al = lookup_host (host, LH_REFRESH);
429 address_list_release (al);
434 /* Create a socket, bind it to local interface BIND_ADDRESS on port
435 *PORT, set up a listen backlog, and return the resulting socket, or
438 BIND_ADDRESS is the address of the interface to bind to. If it is
439 NULL, the socket is bound to the default address. PORT should
440 point to the port number that will be used for the binding. If
441 that number is 0, the system will choose a suitable port, and the
442 chosen value will be written to *PORT.
444 Calling accept() on such a socket waits for and accepts incoming
448 bind_local (const ip_address *bind_address, int *port)
451 struct sockaddr_storage ss;
452 struct sockaddr *sa = (struct sockaddr *)&ss;
454 /* For setting options with setsockopt. */
456 void *setopt_ptr = (void *)&setopt_val;
457 socklen_t setopt_size = sizeof (setopt_val);
459 sock = socket (bind_address->family, SOCK_STREAM, 0);
464 setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size);
468 sockaddr_set_data (sa, bind_address, *port);
469 if (bind (sock, sa, sockaddr_size (sa)) < 0)
474 DEBUGP (("Local socket fd %d bound.\n", sock));
476 /* If *PORT is 0, find out which port we've bound to. */
479 socklen_t addrlen = sockaddr_size (sa);
480 if (getsockname (sock, sa, &addrlen) < 0)
482 /* If we can't find out the socket's local address ("name"),
483 something is seriously wrong with the socket, and it's
484 unusable for us anyway because we must know the chosen
489 sockaddr_get_data (sa, NULL, port);
490 DEBUGP (("binding to address %s using port %i.\n",
491 print_address (bind_address), *port));
493 if (listen (sock, 1) < 0)
501 /* Like a call to accept(), but with the added check for timeout.
503 In other words, accept a client connection on LOCAL_SOCK, and
504 return the new socket used for communication with the client.
505 LOCAL_SOCK should have been bound, e.g. using bind_local().
507 The caller is blocked until a connection is established. If no
508 connection is established for opt.connect_timeout seconds, the
509 function exits with an error status. */
512 accept_connection (int local_sock)
516 /* We don't need the values provided by accept, but accept
517 apparently requires them to be present. */
518 struct sockaddr_storage ss;
519 struct sockaddr *sa = (struct sockaddr *)&ss;
520 socklen_t addrlen = sizeof (ss);
522 if (opt.connect_timeout)
524 int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ);
530 sock = accept (local_sock, sa, &addrlen);
531 DEBUGP (("Accepted client at socket %d.\n", sock));
535 /* Get the IP address associated with the connection on FD and store
536 it to IP. Return true on success, false otherwise.
538 If ENDPOINT is ENDPOINT_LOCAL, it returns the address of the local
539 (client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it
540 returns the address of the remote (peer's) side of the socket. */
543 socket_ip_address (int sock, ip_address *ip, int endpoint)
545 struct sockaddr_storage storage;
546 struct sockaddr *sockaddr = (struct sockaddr *) &storage;
547 socklen_t addrlen = sizeof (storage);
550 memset (sockaddr, 0, addrlen);
551 if (endpoint == ENDPOINT_LOCAL)
552 ret = getsockname (sock, sockaddr, &addrlen);
553 else if (endpoint == ENDPOINT_PEER)
554 ret = getpeername (sock, sockaddr, &addrlen);
560 ip->family = sockaddr->sa_family;
561 switch (sockaddr->sa_family)
566 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&storage;
567 ip->data.d6 = sa6->sin6_addr;
568 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
569 ip->ipv6_scope = sa6->sin6_scope_id;
571 DEBUGP (("conaddr is: %s\n", print_address (ip)));
577 struct sockaddr_in *sa = (struct sockaddr_in *)&storage;
578 ip->data.d4 = sa->sin_addr;
579 DEBUGP (("conaddr is: %s\n", print_address (ip)));
587 /* Get the socket family of connection on FD and store
588 Return family type on success, -1 otherwise.
590 If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local
591 (client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it
592 returns the sock family of the remote (peer's) side of the socket. */
595 socket_family (int sock, int endpoint)
597 struct sockaddr_storage storage;
598 struct sockaddr *sockaddr = (struct sockaddr *) &storage;
599 socklen_t addrlen = sizeof (storage);
602 memset (sockaddr, 0, addrlen);
604 if (endpoint == ENDPOINT_LOCAL)
605 ret = getsockname (sock, sockaddr, &addrlen);
606 else if (endpoint == ENDPOINT_PEER)
607 ret = getpeername (sock, sockaddr, &addrlen);
614 return sockaddr->sa_family;
617 /* Return true if the error from the connect code can be considered
618 retryable. Wget normally retries after errors, but the exception
619 are the "unsupported protocol" type errors (possible on IPv4/IPv6
620 dual family systems) and "connection refused". */
623 retryable_socket_connect_error (int err)
625 /* Have to guard against some of these values not being defined.
626 Cannot use a switch statement because some of the values might be
630 || err == EAFNOSUPPORT
633 || err == EPFNOSUPPORT
635 #ifdef ESOCKTNOSUPPORT /* no, "sockt" is not a typo! */
636 || err == ESOCKTNOSUPPORT
638 #ifdef EPROTONOSUPPORT
639 || err == EPROTONOSUPPORT
642 || err == ENOPROTOOPT
644 /* Apparently, older versions of Linux and BSD used EINVAL
645 instead of EAFNOSUPPORT and such. */
650 if (!opt.retry_connrefused)
651 if (err == ECONNREFUSED
653 || err == ENETUNREACH /* network is unreachable */
656 || err == EHOSTUNREACH /* host is unreachable */
664 /* Wait for a single descriptor to become available, timing out after
665 MAXTIME seconds. Returns 1 if FD is available, 0 for timeout and
666 -1 for error. The argument WAIT_FOR can be a combination of
667 WAIT_FOR_READ and WAIT_FOR_WRITE.
669 This is a mere convenience wrapper around the select call, and
670 should be taken as such (for example, it doesn't implement Wget's
671 0-timeout-means-no-timeout semantics.) */
674 select_fd (int fd, double maxtime, int wait_for)
677 fd_set *rd = NULL, *wr = NULL;
678 struct timeval tmout;
683 if (wait_for & WAIT_FOR_READ)
685 if (wait_for & WAIT_FOR_WRITE)
688 tmout.tv_sec = (long) maxtime;
689 tmout.tv_usec = 1000000 * (maxtime - (long) maxtime);
693 result = select (fd + 1, rd, wr, NULL, &tmout);
695 /* gnulib select() converts blocking sockets to nonblocking in windows.
696 wget uses blocking sockets so we must convert them back to blocking. */
697 set_windows_fd_as_blocking_socket (fd);
700 while (result < 0 && errno == EINTR);
705 /* Return true iff the connection to the remote site established
706 through SOCK is still open.
708 Specifically, this function returns true if SOCK is not ready for
709 reading. This is because, when the connection closes, the socket
710 is ready for reading because EOF is about to be delivered. A side
711 effect of this method is that sockets that have pending data are
712 considered non-open. This is actually a good thing for callers of
713 this function, where such pending data can only be unwanted
714 leftover from a previous request. */
717 test_socket_open (int sock)
723 /* Check if we still have a valid (non-EOF) connection. From Andrew
724 * Maholski's code in the Unix Socket FAQ. */
726 FD_ZERO (&check_set);
727 FD_SET (sock, &check_set);
729 /* Wait one microsecond */
733 ret = select (sock + 1, &check_set, NULL, NULL, &to);
735 /* gnulib select() converts blocking sockets to nonblocking in windows.
736 wget uses blocking sockets so we must convert them back to blocking
738 set_windows_fd_as_blocking_socket ( sock );
742 /* We got a timeout, it means we're still connected. */
745 /* Read now would not wait, it means we have either pending data
750 /* Basic socket operations, mostly EINTR wrappers. */
753 sock_read (int fd, char *buf, int bufsize)
757 res = read (fd, buf, bufsize);
758 while (res == -1 && errno == EINTR);
763 sock_write (int fd, char *buf, int bufsize)
767 res = write (fd, buf, bufsize);
768 while (res == -1 && errno == EINTR);
773 sock_poll (int fd, double timeout, int wait_for)
775 return select_fd (fd, timeout, wait_for);
779 sock_peek (int fd, char *buf, int bufsize)
783 res = recv (fd, buf, bufsize, MSG_PEEK);
784 while (res == -1 && errno == EINTR);
792 DEBUGP (("Closed fd %d\n", fd));
798 /* Reading and writing from the network. We build around the socket
799 (file descriptor) API, but support "extended" operations for things
800 that are not mere file descriptors under the hood, such as SSL
803 That way the user code can call fd_read(fd, ...) and we'll run read
804 or SSL_read or whatever is necessary. */
806 static struct hash_table *transport_map;
807 static unsigned int transport_map_modified_tick;
809 struct transport_info {
810 struct transport_implementation *imp;
814 /* Register the transport layer operations that will be used when
815 reading, writing, and polling FD.
817 This should be used for transport layers like SSL that piggyback on
818 sockets. FD should otherwise be a real socket, on which you can
819 call getpeername, etc. */
822 fd_register_transport (int fd, struct transport_implementation *imp, void *ctx)
824 struct transport_info *info;
826 /* The file descriptor must be non-negative to be registered.
827 Negative values are ignored by fd_close(), and -1 cannot be used as
831 info = xnew (struct transport_info);
835 transport_map = hash_table_new (0, NULL, NULL);
836 hash_table_put (transport_map, (void *)(intptr_t) fd, info);
837 ++transport_map_modified_tick;
840 /* Return context of the transport registered with
841 fd_register_transport. This assumes fd_register_transport was
842 previously called on FD. */
845 fd_transport_context (int fd)
847 struct transport_info *info = hash_table_get (transport_map, (void *)(intptr_t) fd);
851 /* When fd_read/fd_write are called multiple times in a loop, they should
852 remember the INFO pointer instead of fetching it every time. It is
853 not enough to compare FD to LAST_FD because FD might have been
854 closed and reopened. modified_tick ensures that changes to
855 transport_map will not be unnoticed.
857 This is a macro because we want the static storage variables to be
860 #define LAZY_RETRIEVE_INFO(info) do { \
861 static struct transport_info *last_info; \
862 static int last_fd = -1; \
863 static unsigned int last_tick; \
864 if (!transport_map) \
866 else if (last_fd == fd && last_tick == transport_map_modified_tick) \
870 info = hash_table_get (transport_map, (void *)(intptr_t) fd); \
873 last_tick = transport_map_modified_tick; \
878 poll_internal (int fd, struct transport_info *info, int wf, double timeout)
881 timeout = opt.read_timeout;
885 if (info && info->imp->poller)
886 test = info->imp->poller (fd, timeout, wf, info->ctx);
888 test = sock_poll (fd, timeout, wf);
897 /* Read no more than BUFSIZE bytes of data from FD, storing them to
898 BUF. If TIMEOUT is non-zero, the operation aborts if no data is
899 received after that many seconds. If TIMEOUT is -1, the value of
900 opt.timeout is used for TIMEOUT. */
903 fd_read (int fd, char *buf, int bufsize, double timeout)
905 struct transport_info *info;
906 LAZY_RETRIEVE_INFO (info);
907 if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
909 if (info && info->imp->reader)
910 return info->imp->reader (fd, buf, bufsize, info->ctx);
912 return sock_read (fd, buf, bufsize);
915 /* Like fd_read, except it provides a "preview" of the data that will
916 be read by subsequent calls to fd_read. Specifically, it copies no
917 more than BUFSIZE bytes of the currently available data to BUF and
918 returns the number of bytes copied. Return values and timeout
919 semantics are the same as those of fd_read.
921 CAVEAT: Do not assume that the first subsequent call to fd_read
922 will retrieve the same amount of data. Reading can return more or
923 less data, depending on the TCP implementation and other
924 circumstances. However, barring an error, it can be expected that
925 all the peeked data will eventually be read by fd_read. */
928 fd_peek (int fd, char *buf, int bufsize, double timeout)
930 struct transport_info *info;
931 LAZY_RETRIEVE_INFO (info);
932 if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
934 if (info && info->imp->peeker)
935 return info->imp->peeker (fd, buf, bufsize, info->ctx);
937 return sock_peek (fd, buf, bufsize);
940 /* Write the entire contents of BUF to FD. If TIMEOUT is non-zero,
941 the operation aborts if no data is received after that many
942 seconds. If TIMEOUT is -1, the value of opt.timeout is used for
946 fd_write (int fd, char *buf, int bufsize, double timeout)
949 struct transport_info *info;
950 LAZY_RETRIEVE_INFO (info);
952 /* `write' may write less than LEN bytes, thus the loop keeps trying
953 it until all was written, or an error occurred. */
957 if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout))
959 if (info && info->imp->writer)
960 res = info->imp->writer (fd, buf, bufsize, info->ctx);
962 res = sock_write (fd, buf, bufsize);
971 /* Report the most recent error(s) on FD. This should only be called
972 after fd_* functions, such as fd_read and fd_write, and only if
973 they return a negative result. For errors coming from other calls
974 such as setsockopt or fopen, strerror should continue to be
977 If the transport doesn't support error messages or doesn't supply
978 one, strerror(errno) is returned. The returned error message
979 should not be used after fd_close has been called. */
984 /* Don't bother with LAZY_RETRIEVE_INFO, as this will only be called
985 in case of error, never in a tight loop. */
986 struct transport_info *info = NULL;
988 info = hash_table_get (transport_map, (void *)(intptr_t) fd);
990 if (info && info->imp->errstr)
992 const char *err = info->imp->errstr (fd, info->ctx);
995 /* else, fall through and print the system error. */
997 return strerror (errno);
1000 /* Close the file descriptor FD. */
1005 struct transport_info *info;
1009 /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once
1010 per socket, so that particular optimization wouldn't work. */
1013 info = hash_table_get (transport_map, (void *)(intptr_t) fd);
1015 if (info && info->imp->closer)
1016 info->imp->closer (fd, info->ctx);
1022 hash_table_remove (transport_map, (void *)(intptr_t) fd);
1024 ++transport_map_modified_tick;