X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhost.c;h=d3b8da84b25cf7039afb29301f38aeab6aeb7442;hp=601487aeecabb6e25209f80a867813102d90543f;hb=d5e283b1a75c5f8249300b465b4e7b55130bec49;hpb=38ea4d5dadc634855ee1027c21cc0b1769197014 diff --git a/src/host.c b/src/host.c index 601487ae..d3b8da84 100644 --- a/src/host.c +++ b/src/host.c @@ -1,12 +1,13 @@ /* Host name resolution and matching. - Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. +the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -14,123 +15,127 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ -#ifndef WINDOWS -#include -#endif +#include "wget.h" #include #include -#ifdef HAVE_STRING_H -# include -#else -# include -#endif +#include #include -#include -#ifdef WINDOWS -# include -# define SET_H_ERRNO(err) WSASetLastError(err) -#else +#ifndef WINDOWS +# include # include # include # ifndef __BEOS__ # include # endif -# include +# ifdef __VMS +# include "vms_ip.h" +# else /* def __VMS */ +# include +# endif /* def __VMS [else] */ # define SET_H_ERRNO(err) ((void)(h_errno = (err))) +#else /* WINDOWS */ +# define SET_H_ERRNO(err) WSASetLastError (err) #endif /* WINDOWS */ -#ifndef NO_ADDRESS -#define NO_ADDRESS NO_DATA -#endif - -#ifdef HAVE_SYS_UTSNAME_H -# include -#endif #include -#include "wget.h" #include "utils.h" #include "host.h" #include "url.h" #include "hash.h" -#ifndef errno -extern int errno; +#ifndef NO_ADDRESS +# define NO_ADDRESS NO_DATA #endif -#ifndef h_errno -# ifndef __CYGWIN__ +#if !HAVE_DECL_H_ERRNO extern int h_errno; -# endif -#endif - -#ifdef ENABLE_IPV6 -int ip_default_family = AF_INET6; -#else -int ip_default_family = AF_INET; #endif -/* Mapping between known hosts and to lists of their addresses. */ -static struct hash_table *host_name_addresses_map; - -/* Lists of addresses. This should eventually be extended to handle - IPv6. */ +/* Lists of IP addresses that result from running DNS queries. See + lookup_host for details. */ struct address_list { - int count; /* number of adrresses */ - ip_address *addresses; /* pointer to the string of addresses */ + int count; /* number of adrresses */ + ip_address *addresses; /* pointer to the string of addresses */ - int faulty; /* number of addresses known not to work. */ - int refcount; /* so we know whether to free it or not. */ + int faulty; /* number of addresses known not to work. */ + bool connected; /* whether we were able to connect to + one of the addresses in the list, + at least once. */ + + int refcount; /* reference count; when it drops to + 0, the entry is freed. */ }; /* Get the bounds of the address list. */ void -address_list_get_bounds (struct address_list *al, int *start, int *end) +address_list_get_bounds (const struct address_list *al, int *start, int *end) { *start = al->faulty; *end = al->count; } -/* Copy address number INDEX to IP_STORE. */ +/* Return a pointer to the address at position POS. */ -void -address_list_copy_one (struct address_list *al, int index, ip_address *ip_store) +const ip_address * +address_list_address_at (const struct address_list *al, int pos) { - assert (index >= al->faulty && index < al->count); - memcpy (ip_store, al->addresses + index, sizeof (ip_address)); + assert (pos >= al->faulty && pos < al->count); + return al->addresses + pos; } -/* Check whether two address lists have all their IPs in common. */ +/* Return true if AL contains IP, false otherwise. */ -int -address_list_match_all (struct address_list *al1, struct address_list *al2) +bool +address_list_contains (const struct address_list *al, const ip_address *ip) { - if (al1 == al2) - return 1; - if (al1->count != al2->count) - return 0; - return 0 == memcmp (al1->addresses, al2->addresses, - al1->count * sizeof (ip_address)); + int i; + switch (ip->family) + { + case AF_INET: + for (i = 0; i < al->count; i++) + { + ip_address *cur = al->addresses + i; + if (cur->family == AF_INET + && (cur->data.d4.s_addr == ip->data.d4.s_addr)) + return true; + } + return false; +#ifdef ENABLE_IPV6 + case AF_INET6: + for (i = 0; i < al->count; i++) + { + ip_address *cur = al->addresses + i; + if (cur->family == AF_INET6 +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + && cur->ipv6_scope == ip->ipv6_scope +#endif + && IN6_ARE_ADDR_EQUAL (&cur->data.d6, &ip->data.d6)) + return true; + } + return false; +#endif /* ENABLE_IPV6 */ + default: + abort (); + } } /* Mark the INDEXth element of AL as faulty, so that the next time @@ -153,95 +158,131 @@ address_list_set_faulty (struct address_list *al, int index) al->faulty = 0; } -#ifdef HAVE_GETADDRINFO -/** - * address_list_from_addrinfo - * - * This function transform an addrinfo links list in and address_list. - * - * Input: - * addrinfo* Linkt list of addrinfo - * - * Output: - * address_list* New allocated address_list - */ +/* Set the "connected" flag to true. This flag used by connect.c to + see if the host perhaps needs to be resolved again. */ + +void +address_list_set_connected (struct address_list *al) +{ + al->connected = true; +} + +/* Return the value of the "connected" flag. */ + +bool +address_list_connected_p (const struct address_list *al) +{ + return al->connected; +} + +#ifdef ENABLE_IPV6 + +/* Create an address_list from the addresses in the given struct + addrinfo. */ + static struct address_list * -address_list_from_addrinfo (struct addrinfo *ai) +address_list_from_addrinfo (const struct addrinfo *ai) { struct address_list *al; - struct addrinfo *ai_head = ai; - int cnt = 0; - int i; + const struct addrinfo *ptr; + int cnt; + ip_address *ip; - for (ai = ai_head; ai; ai = ai->ai_next) - if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6) + cnt = 0; + for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next) + if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6) ++cnt; if (cnt == 0) return NULL; - al = xmalloc (sizeof (struct address_list)); - al->addresses = xmalloc (cnt * sizeof (ip_address)); + al = xnew0 (struct address_list); + al->addresses = xnew_array (ip_address, cnt); al->count = cnt; - al->faulty = 0; al->refcount = 1; - for (i = 0, ai = ai_head; ai; ai = ai->ai_next) - if (ai->ai_family == AF_INET6) + ip = al->addresses; + for (ptr = ai; ptr != NULL; ptr = ptr->ai_next) + if (ptr->ai_family == AF_INET6) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai->ai_addr; - memcpy (al->addresses + i, &sin6->sin6_addr, 16); - ++i; + const struct sockaddr_in6 *sin6 = + (const struct sockaddr_in6 *)ptr->ai_addr; + ip->family = AF_INET6; + ip->data.d6 = sin6->sin6_addr; +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + ip->ipv6_scope = sin6->sin6_scope_id; +#endif + ++ip; } - else if (ai->ai_family == AF_INET) + else if (ptr->ai_family == AF_INET) { - struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr; - map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i); - ++i; + const struct sockaddr_in *sin = + (const struct sockaddr_in *)ptr->ai_addr; + ip->family = AF_INET; + ip->data.d4 = sin->sin_addr; + ++ip; } - assert (i == cnt); + assert (ip - al->addresses == cnt); return al; } -#else -/* Create an address_list out of a NULL-terminated vector of - addresses, as returned by gethostbyname. */ -static struct address_list * -address_list_from_vector (char **h_addr_list) -{ - int count = 0, i; - struct address_list *al = xmalloc (sizeof (struct address_list)); +#define IS_IPV4(addr) (((const ip_address *) addr)->family == AF_INET) - while (h_addr_list[count]) - ++count; - assert (count > 0); - al->count = count; - al->faulty = 0; - al->addresses = xmalloc (count * sizeof (ip_address)); - al->refcount = 1; +/* Compare two IP addresses by family, giving preference to the IPv4 + address (sorting it first). In other words, return -1 if ADDR1 is + IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and + 0 otherwise. - for (i = 0; i < count; i++) - map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i); + This is intended to be used as the comparator arg to a qsort-like + sorting function, which is why it accepts generic pointers. */ - return al; +static int +cmp_prefer_ipv4 (const void *addr1, const void *addr2) +{ + return !IS_IPV4 (addr1) - !IS_IPV4 (addr2); } -#endif -/* Like address_list_from_vector, but initialized with a single - address. */ +#define IS_IPV6(addr) (((const ip_address *) addr)->family == AF_INET6) + +/* Like the above, but give preference to the IPv6 address. */ + +static int +cmp_prefer_ipv6 (const void *addr1, const void *addr2) +{ + return !IS_IPV6 (addr1) - !IS_IPV6 (addr2); +} + +#else /* not ENABLE_IPV6 */ + +/* Create an address_list from a NULL-terminated vector of IPv4 + addresses. This kind of vector is returned by gethostbyname. */ static struct address_list * -address_list_from_single (ip_address *addr) +address_list_from_ipv4_addresses (char **vec) { - struct address_list *al = xmalloc (sizeof (struct address_list)); - al->count = 1; - al->faulty = 0; - al->addresses = xmalloc (sizeof (ip_address)); + int count, i; + struct address_list *al = xnew0 (struct address_list); + + count = 0; + while (vec[count]) + ++count; + assert (count > 0); + + al->addresses = xnew_array (ip_address, count); + al->count = count; al->refcount = 1; - memcpy (al->addresses, addr, sizeof (ip_address)); + + for (i = 0; i < count; i++) + { + ip_address *ip = &al->addresses[i]; + ip->family = AF_INET; + memcpy (IP_INADDR_DATA (ip), vec[i], 4); + } return al; } +#endif /* not ENABLE_IPV6 */ + static void address_list_delete (struct address_list *al) { @@ -249,229 +290,23 @@ address_list_delete (struct address_list *al) xfree (al); } +/* Mark the address list as being no longer in use. This will reduce + its reference count which will cause the list to be freed when the + count reaches 0. */ + void address_list_release (struct address_list *al) { --al->refcount; - DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount)); + DEBUGP (("Releasing 0x%0*lx (new refcount %d).\n", PTR_FORMAT (al), + al->refcount)); if (al->refcount <= 0) { - DEBUGP (("Deleting unused %p.\n", al)); + DEBUGP (("Deleting unused 0x%0*lx.\n", PTR_FORMAT (al))); address_list_delete (al); } } -/** - * wget_sockaddr_set_address - * - * This function takes an wget_sockaddr and fill in the protocol type, - * the port number and the address, there NULL in address means wildcard. - * Unsuported adress family will abort the whole programm. - * - * Input: - * wget_sockaddr* The space to be filled - * int The wished protocol - * unsigned short The port - * const ip_address The Binary IP adress - * - * Return: - * - Only modify 1. param - */ -void -wget_sockaddr_set_address (wget_sockaddr *sa, - int ip_family, unsigned short port, ip_address *addr) -{ - if (ip_family == AF_INET) - { - sa->sin.sin_family = ip_family; - sa->sin.sin_port = htons (port); - if (addr == NULL) - memset (&sa->sin.sin_addr, 0, sizeof(ip4_address)); - else - { - ip4_address addr4; - if (!map_ip_to_ipv4 (addr, &addr4)) - /* should the callers have prevented this? */ - abort (); - memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address)); - } - return; - } -#ifdef ENABLE_IPV6 - if (ip_family == AF_INET6) - { - sa->sin6.sin6_family = ip_family; - sa->sin6.sin6_port = htons (port); - if (addr == NULL) - memset (&sa->sin6.sin6_addr, 0 , 16); - else - memcpy (&sa->sin6.sin6_addr, addr, 16); - return; - } -#endif - abort(); -} - -/** - * wget_sockaddr_set_port - * - * This funtion only fill the port of the socket information. - * If the protocol is not supported nothing is done. - * Unsuported adress family will abort the whole programm. - * - * Require: - * that the IP-Protocol already is set. - * - * Input: - * wget_sockaddr* The space there port should be entered - * unsigned int The port that should be entered in host order - * - * Return: - * - Only modify 1. param - */ -void -wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port) -{ - if (sa->sa.sa_family == AF_INET) - { - sa->sin.sin_port = htons (port); - return; - } -#ifdef ENABLE_IPV6 - if (sa->sa.sa_family == AF_INET6) - { - sa->sin6.sin6_port = htons (port); - return; - } -#endif - abort(); -} - -/** - * wget_sockaddr_get_addr - * - * This function return the adress from an sockaddr as byte string. - * Unsuported adress family will abort the whole programm. - * - * Require: - * that the IP-Protocol already is set. - * - * Input: - * wget_sockaddr* Socket Information - * - * Output: - * unsigned char * IP address as byte string. - */ -void * -wget_sockaddr_get_addr (wget_sockaddr *sa) -{ - if (sa->sa.sa_family == AF_INET) - return &sa->sin.sin_addr; -#ifdef ENABLE_IPV6 - if (sa->sa.sa_family == AF_INET6) - return &sa->sin6.sin6_addr; -#endif - abort(); - /* unreached */ - return NULL; -} - -/** - * wget_sockaddr_get_port - * - * This function only return the port from the input structure - * Unsuported adress family will abort the whole programm. - * - * Require: - * that the IP-Protocol already is set. - * - * Input: - * wget_sockaddr* Information where to get the port - * - * Output: - * unsigned short Port Number in host order. - */ -unsigned short -wget_sockaddr_get_port (const wget_sockaddr *sa) -{ - if (sa->sa.sa_family == AF_INET) - return htons (sa->sin.sin_port); -#ifdef ENABLE_IPV6 - if (sa->sa.sa_family == AF_INET6) - return htons (sa->sin6.sin6_port); -#endif - abort(); - /* do not complain about return nothing */ - return -1; -} - -/** - * sockaddr_len - * - * This function return the length of the sockaddr corresponding to - * the acutall prefered protocol for (bind, connect etc...) - * Unsuported adress family will abort the whole programm. - * - * Require: - * that the IP-Protocol already is set. - * - * Input: - * - Public IP-Family Information - * - * Output: - * int structure length for socket options - */ -int -sockaddr_len () -{ - if (ip_default_family == AF_INET) - return sizeof (struct sockaddr_in); -#ifdef ENABLE_IPV6 - if (ip_default_family == AF_INET6) - return sizeof (struct sockaddr_in6); -#endif - abort(); - /* do not complain about return nothing */ - return 0; -} - -/** - * Map an IPv4 adress to the internal adress format. - */ -void -map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip) -{ -#ifdef ENABLE_IPV6 - static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}; - memcpy ((char *)ip + 12, ipv4 , 4); - memcpy ((char *)ip + 0, ipv64, 12); -#else - if ((char *)ip != (char *)ipv4) - memcpy (ip, ipv4, 4); -#endif -} - -/* Detect whether an IP adress represents an IPv4 address and, if so, - copy it to IPV4. 0 is returned on failure. - This operation always succeeds when Wget is compiled without IPv6. - If IPV4 is NULL, don't copy, just detect. */ - -int -map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4) -{ -#ifdef ENABLE_IPV6 - static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}; - if (0 != memcmp (ip, ipv64, 12)) - return 0; - if (ipv4) - memcpy (ipv4, (char *)ip + 12, 4); -#else - if (ipv4) - memcpy (ipv4, (char *)ip, 4); -#endif - return 1; -} - /* Versions of gethostbyname and getaddrinfo that support timeout. */ #ifndef ENABLE_IPV6 @@ -509,6 +344,26 @@ gethostbyname_with_timeout (const char *host_name, double timeout) return ctx.hptr; } +/* Print error messages for host errors. */ +static char * +host_errstr (int error) +{ + /* Can't use switch since some of these constants can be equal, + which makes the compiler complain about duplicate case + values. */ + if (error == HOST_NOT_FOUND + || error == NO_RECOVERY + || error == NO_DATA + || error == NO_ADDRESS) + return _("Unknown host"); + else if (error == TRY_AGAIN) + /* Message modeled after what gai_strerror returns in similar + circumstances. */ + return _("Temporary failure in name resolution"); + else + return _("Unknown error"); +} + #else /* ENABLE_IPV6 */ struct gaiwt_context { @@ -532,8 +387,8 @@ getaddrinfo_with_timeout_callback (void *arg) static int getaddrinfo_with_timeout (const char *node, const char *service, - const struct addrinfo *hints, struct addrinfo **res, - double timeout) + const struct addrinfo *hints, struct addrinfo **res, + double timeout) { struct gaiwt_context ctx; ctx.node = node; @@ -551,33 +406,201 @@ getaddrinfo_with_timeout (const char *node, const char *service, #endif /* ENABLE_IPV6 */ -/* Pretty-print ADDR. When compiled without IPv6, this is the same as - inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4 - address. */ +/* Return a textual representation of ADDR, i.e. the dotted quad for + IPv4 addresses, and the colon-separated list of hex words (with all + zeros omitted, etc.) for IPv6 addresses. */ -char * -pretty_print_address (ip_address *addr) +const char * +print_address (const ip_address *addr) { #ifdef ENABLE_IPV6 - ip4_address addr4; - static char buf[128]; - - if (map_ip_to_ipv4 (addr, &addr4)) - return inet_ntoa (*(struct in_addr *)&addr4); - - if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf))) - return ""; + static char buf[64]; + if (!inet_ntop (addr->family, IP_INADDR_DATA (addr), buf, sizeof buf)) + snprintf (buf, sizeof buf, "", strerror (errno)); return buf; +#else + return inet_ntoa (addr->data.d4); #endif - return inet_ntoa (*(struct in_addr *)addr); } -/* Add host name HOST with the address ADDR_TEXT to the cache. - ADDR_LIST is a NULL-terminated list of addresses, as in struct - hostent. */ +/* The following two functions were adapted from glibc's + implementation of inet_pton, written by Paul Vixie. */ + +static bool +is_valid_ipv4_address (const char *str, const char *end) +{ + bool saw_digit = false; + int octets = 0; + int val = 0; + + while (str < end) + { + int ch = *str++; + + if (ch >= '0' && ch <= '9') + { + val = val * 10 + (ch - '0'); + + if (val > 255) + return false; + if (!saw_digit) + { + if (++octets > 4) + return false; + saw_digit = true; + } + } + else if (ch == '.' && saw_digit) + { + if (octets == 4) + return false; + val = 0; + saw_digit = false; + } + else + return false; + } + if (octets < 4) + return false; + + return true; +} + +bool +is_valid_ipv6_address (const char *str, const char *end) +{ + /* Use lower-case for these to avoid clash with system headers. */ + enum { + ns_inaddrsz = 4, + ns_in6addrsz = 16, + ns_int16sz = 2 + }; + + const char *curtok; + int tp; + const char *colonp; + bool saw_xdigit; + unsigned int val; + + tp = 0; + colonp = NULL; + + if (str == end) + return false; + + /* Leading :: requires some special handling. */ + if (*str == ':') + { + ++str; + if (str == end || *str != ':') + return false; + } + + curtok = str; + saw_xdigit = false; + val = 0; + + while (str < end) + { + int ch = *str++; + + /* if ch is a number, add it to val. */ + if (c_isxdigit (ch)) + { + val <<= 4; + val |= XDIGIT_TO_NUM (ch); + if (val > 0xffff) + return false; + saw_xdigit = true; + continue; + } + + /* if ch is a colon ... */ + if (ch == ':') + { + curtok = str; + if (!saw_xdigit) + { + if (colonp != NULL) + return false; + colonp = str + tp; + continue; + } + else if (str == end) + return false; + if (tp > ns_in6addrsz - ns_int16sz) + return false; + tp += ns_int16sz; + saw_xdigit = false; + val = 0; + continue; + } + + /* if ch is a dot ... */ + if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz) + && is_valid_ipv4_address (curtok, end) == 1) + { + tp += ns_inaddrsz; + saw_xdigit = false; + break; + } + + return false; + } + + if (saw_xdigit) + { + if (tp > ns_in6addrsz - ns_int16sz) + return false; + tp += ns_int16sz; + } + + if (colonp != NULL) + { + if (tp == ns_in6addrsz) + return false; + tp = ns_in6addrsz; + } + + if (tp != ns_in6addrsz) + return false; + + return true; +} + +/* Simple host cache, used by lookup_host to speed up resolving. The + cache doesn't handle TTL because Wget is a fairly short-lived + application. Refreshing is attempted when connect fails, though -- + see connect_to_host. */ + +/* Mapping between known hosts and to lists of their addresses. */ +static struct hash_table *host_name_addresses_map; + + +/* Return the host's resolved addresses from the cache, if + available. */ + +static struct address_list * +cache_query (const char *host) +{ + struct address_list *al; + if (!host_name_addresses_map) + return NULL; + al = hash_table_get (host_name_addresses_map, host); + if (al) + { + DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al)); + ++al->refcount; + return al; + } + return NULL; +} + +/* Cache the DNS lookup of HOST. Subsequent invocations of + lookup_host will return the cached value. */ static void -cache_host_lookup (const char *host, struct address_list *al) +cache_store (const char *host, struct address_list *al) { if (!host_name_addresses_map) host_name_addresses_map = make_nocase_string_hash_table (0); @@ -585,152 +608,257 @@ cache_host_lookup (const char *host, struct address_list *al) ++al->refcount; hash_table_put (host_name_addresses_map, xstrdup_lower (host), al); -#ifdef ENABLE_DEBUG - if (opt.debug) + IF_DEBUG { int i; debug_logprintf ("Caching %s =>", host); for (i = 0; i < al->count; i++) - debug_logprintf (" %s", pretty_print_address (al->addresses + i)); + debug_logprintf (" %s", print_address (al->addresses + i)); debug_logprintf ("\n"); } -#endif } +/* Remove HOST from the DNS cache. Does nothing is HOST is not in + the cache. */ + +static void +cache_remove (const char *host) +{ + struct address_list *al; + if (!host_name_addresses_map) + return; + al = hash_table_get (host_name_addresses_map, host); + if (al) + { + address_list_release (al); + hash_table_remove (host_name_addresses_map, host); + } +} + +/* Look up HOST in DNS and return a list of IP addresses. + + This function caches its result so that, if the same host is passed + the second time, the addresses are returned without DNS lookup. + (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to + globally disable caching.) + + The order of the returned addresses is affected by the setting of + opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are + placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed + at the beginning; otherwise, the order is left intact. The + relative order of addresses with the same family is left + undisturbed in either case. + + FLAGS can be a combination of: + LH_SILENT - don't print the "resolving ... done" messages. + LH_BIND - resolve addresses for use with bind, which under + IPv6 means to use AI_PASSIVE flag to getaddrinfo. + Passive lookups are not cached under IPv6. + LH_REFRESH - if HOST is cached, remove the entry from the cache + and resolve it anew. */ + struct address_list * -lookup_host (const char *host, int silent) +lookup_host (const char *host, int flags) { - struct address_list *al = NULL; - unsigned long addr_ipv4; /* #### use a 32-bit type here. */ - ip_address addr; + struct address_list *al; + bool silent = !!(flags & LH_SILENT); + bool use_cache; + bool numeric_address = false; + double timeout = opt.dns_timeout; - /* First, try to check whether the address is already a numeric - address. */ +#ifndef ENABLE_IPV6 + /* If we're not using getaddrinfo, first check if HOST specifies a + numeric IPv4 address. Some implementations of gethostbyname + (e.g. the Ultrix one and possibly Winsock) don't accept + dotted-decimal IPv4 addresses. */ + { + uint32_t addr_ipv4 = (uint32_t)inet_addr (host); + if (addr_ipv4 != (uint32_t) -1) + { + /* No need to cache host->addr relation, just return the + address. */ + char *vec[2]; + vec[0] = (char *)&addr_ipv4; + vec[1] = NULL; + return address_list_from_ipv4_addresses (vec); + } + } +#else /* ENABLE_IPV6 */ + /* If we're using getaddrinfo, at least check whether the address is + already numeric, in which case there is no need to print the + "Resolving..." output. (This comes at no additional cost since + the is_valid_ipv*_address are already required for + url_parse.) */ + { + const char *end = host + strlen (host); + if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end)) + numeric_address = true; + } +#endif + /* Cache is normally on, but can be turned off with --no-dns-cache. + Don't cache passive lookups under IPv6. */ + use_cache = opt.dns_cache; #ifdef ENABLE_IPV6 - if (inet_pton (AF_INET6, host, &addr) > 0) - return address_list_from_single (&addr); + if ((flags & LH_BIND) || numeric_address) + use_cache = false; #endif - addr_ipv4 = (unsigned long)inet_addr (host); - if ((int)addr_ipv4 != -1) + /* Try to find the host in the cache so we don't need to talk to the + resolver. If LH_REFRESH is requested, remove HOST from the cache + instead. */ + if (use_cache) { - /* ADDR is defined to be in network byte order, which is what - this returns, so we can just copy it to STORE_IP. However, - on big endian 64-bit architectures the value will be stored - in the *last*, not first four bytes. OFFSET makes sure that - we copy the correct four bytes. */ - int offset = 0; -#ifdef WORDS_BIGENDIAN - offset = sizeof (unsigned long) - sizeof (ip4_address); -#endif - map_ipv4_to_ip ((ip4_address *)((char *)&addr_ipv4 + offset), &addr); - return address_list_from_single (&addr); + if (!(flags & LH_REFRESH)) + { + al = cache_query (host); + if (al) + return al; + } + else + cache_remove (host); } - if (host_name_addresses_map) + /* No luck with the cache; resolve HOST. */ + + if (!silent && !numeric_address) { - al = hash_table_get (host_name_addresses_map, host); - - if (al) - { - DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al)); - ++al->refcount; - return al; - } + char *str = NULL, *name; + + if (opt.enable_iri && (name = idn_decode ((char *) host)) != NULL) + { + int len = strlen (host) + strlen (name) + 4; + str = xmalloc (len); + snprintf (str, len, "%s (%s)", name, host); + str[len-1] = '\0'; + xfree (name); + } + + logprintf (LOG_VERBOSE, _("Resolving %s... "), + quotearg_style (escape_quoting_style, str ? str : host)); + + if (str) + xfree (str); } - if (!silent) - logprintf (LOG_VERBOSE, _("Resolving %s... "), host); - - /* Host name lookup goes on below. */ - -#ifdef HAVE_GETADDRINFO +#ifdef ENABLE_IPV6 { - struct addrinfo hints, *ai; int err; + struct addrinfo hints, *res; - memset (&hints, 0, sizeof (hints)); - if (ip_default_family == AF_INET) - hints.ai_family = AF_INET; - else - hints.ai_family = PF_UNSPEC; + xzero (hints); hints.ai_socktype = SOCK_STREAM; - err = getaddrinfo_with_timeout (host, NULL, &hints, &ai, opt.dns_timeout); + if (opt.ipv4_only) + hints.ai_family = AF_INET; + else if (opt.ipv6_only) + hints.ai_family = AF_INET6; + else + /* We tried using AI_ADDRCONFIG, but removed it because: it + misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and + it's unneeded since we sort the addresses anyway. */ + hints.ai_family = AF_UNSPEC; + + if (flags & LH_BIND) + hints.ai_flags |= AI_PASSIVE; - if (err != 0 || ai == NULL) +#ifdef AI_NUMERICHOST + if (numeric_address) + { + /* Where available, the AI_NUMERICHOST hint can prevent costly + access to DNS servers. */ + hints.ai_flags |= AI_NUMERICHOST; + timeout = 0; /* no timeout needed when "resolving" + numeric hosts -- avoid setting up + signal handlers and such. */ + } +#endif + + err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout); + if (err != 0 || res == NULL) { if (!silent) - logprintf (LOG_VERBOSE, _("failed: %s.\n"), - err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno)); + logprintf (LOG_VERBOSE, _("failed: %s.\n"), + err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno)); return NULL; } - al = address_list_from_addrinfo (ai); - freeaddrinfo (ai); + al = address_list_from_addrinfo (res); + freeaddrinfo (res); + if (!al) + { + logprintf (LOG_VERBOSE, + _("failed: No IPv4/IPv6 addresses for host.\n")); + return NULL; + } + + /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per + --prefer-family) come first. Sorting is stable so the order of + the addresses with the same family is undisturbed. */ + if (al->count > 1 && opt.prefer_family != prefer_none) + stable_sort (al->addresses, al->count, sizeof (ip_address), + opt.prefer_family == prefer_ipv4 + ? cmp_prefer_ipv4 : cmp_prefer_ipv6); } -#else +#else /* not ENABLE_IPV6 */ { - struct hostent *hptr; - hptr = gethostbyname_with_timeout (host, opt.dns_timeout); + struct hostent *hptr = gethostbyname_with_timeout (host, timeout); if (!hptr) { - if (!silent) - { - if (errno != ETIMEDOUT) - logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno)); - else - logputs (LOG_VERBOSE, _("failed: timed out.\n")); - } - return NULL; + if (!silent) + { + if (errno != ETIMEDOUT) + logprintf (LOG_VERBOSE, _("failed: %s.\n"), + host_errstr (h_errno)); + else + logputs (LOG_VERBOSE, _("failed: timed out.\n")); + } + return NULL; } - /* Do all systems have h_addr_list, or is it a newer thing? If - the latter, use address_list_from_single. */ - al = address_list_from_vector (hptr->h_addr_list); + /* Do older systems have h_addr_list? */ + al = address_list_from_ipv4_addresses (hptr->h_addr_list); } -#endif +#endif /* not ENABLE_IPV6 */ /* Print the addresses determined by DNS lookup, but no more than three. */ - if (!silent) + if (!silent && !numeric_address) { int i; int printmax = al->count <= 3 ? al->count : 3; for (i = 0; i < printmax; i++) - { - logprintf (LOG_VERBOSE, "%s", - pretty_print_address (al->addresses + i)); - if (i < printmax - 1) - logputs (LOG_VERBOSE, ", "); - } + { + logputs (LOG_VERBOSE, print_address (al->addresses + i)); + if (i < printmax - 1) + logputs (LOG_VERBOSE, ", "); + } if (printmax != al->count) - logputs (LOG_VERBOSE, ", ..."); + logputs (LOG_VERBOSE, ", ..."); logputs (LOG_VERBOSE, "\n"); } /* Cache the lookup information. */ - if (opt.dns_cache) - cache_host_lookup (host, al); + if (use_cache) + cache_store (host, al); return al; } /* Determine whether a URL is acceptable to be followed, according to a list of domains to accept. */ -int +bool accept_domain (struct url *u) { assert (u->host != NULL); if (opt.domains) { if (!sufmatch ((const char **)opt.domains, u->host)) - return 0; + return false; } if (opt.exclude_domains) { if (sufmatch ((const char **)opt.exclude_domains, u->host)) - return 0; + return false; } - return 1; + return true; } /* Check whether WHAT is matched in LIST, each element of LIST being a @@ -738,7 +866,7 @@ accept_domain (struct url *u) match_backwards() in utils.c). If an element of LIST matched, 1 is returned, 0 otherwise. */ -int +bool sufmatch (const char **list, const char *what) { int i, j, k, lw; @@ -747,43 +875,13 @@ sufmatch (const char **list, const char *what) for (i = 0; list[i]; i++) { for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--) - if (TOLOWER (list[i][j]) != TOLOWER (what[k])) - break; + if (c_tolower (list[i][j]) != c_tolower (what[k])) + break; /* The domain must be first to reach to beginning. */ if (j == -1) - return 1; + return true; } - return 0; -} - -/* Print error messages for host errors. */ -char * -herrmsg (int error) -{ - /* Can't use switch since some constants are equal (at least on my - system), and the compiler signals "duplicate case value". */ - if (error == HOST_NOT_FOUND - || error == NO_RECOVERY - || error == NO_DATA - || error == NO_ADDRESS - || error == TRY_AGAIN) - return _("Host not found"); - else - return _("Unknown error"); -} - -static int -host_cleanup_mapper (void *key, void *value, void *arg_ignored) -{ - struct address_list *al; - - xfree (key); /* host */ - - al = (struct address_list *)value; - assert (al->refcount == 1); - address_list_delete (al); - - return 0; + return false; } void @@ -791,7 +889,17 @@ host_cleanup (void) { if (host_name_addresses_map) { - hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (host_name_addresses_map, &iter); + hash_table_iter_next (&iter); + ) + { + char *host = iter.key; + struct address_list *al = iter.value; + xfree (host); + assert (al->refcount == 1); + address_list_delete (al); + } hash_table_destroy (host_name_addresses_map); host_name_addresses_map = NULL; }