X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhost.c;h=ab490421ae09866c2cf451db83629001719ed60d;hb=5f0a2b3f0846dd4c2f72fc62e7171200d1fd6e06;hp=ddc04445e0fe2d17909769a33b280a552802889b;hpb=1dee527a567d7929dea3c009b2e0797a37a971b6;p=wget diff --git a/src/host.c b/src/host.c index ddc04445..ab490421 100644 --- a/src/host.c +++ b/src/host.c @@ -6,7 +6,7 @@ This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. + (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,10 +15,24 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include +#ifndef WINDOWS +#include +#endif + #include #include #ifdef HAVE_STRING_H @@ -31,6 +45,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef WINDOWS # include +# define SET_H_ERRNO(err) WSASetLastError (err) #else # include # include @@ -38,6 +53,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ # include # endif # include +# define SET_H_ERRNO(err) ((void)(h_errno = (err))) #endif /* WINDOWS */ #ifndef NO_ADDRESS @@ -65,8 +81,11 @@ extern int h_errno; # endif #endif -/* An IPv4 address is simply a 4-byte quantity. */ -typedef unsigned char ipv4_address[4]; +#ifdef ENABLE_IPV6 +int ip_default_family = AF_UNSPEC; +#else +int ip_default_family = AF_INET; +#endif /* Mapping between known hosts and to lists of their addresses. */ @@ -77,44 +96,97 @@ static struct hash_table *host_name_addresses_map; struct address_list { int count; /* number of adrresses */ - ipv4_address *addresses; /* pointer to the string of addresses */ + ip_address *addresses; /* pointer to the string of addresses */ - int faulty; /* number of addresses known not to - work. */ - int refcount; /* so we know whether to free it or - not. */ + int faulty; /* number of addresses known not to work. */ + int from_cache; /* whether this entry was pulled from + cache or freshly looked up. */ + + int refcount; /* reference count; when it drops to + 0, the entry is freed. */ }; /* Get the bounds of the address list. */ void -address_list_get_bounds (struct address_list *al, int *start, int *end) +address_list_get_bounds (const struct address_list *al, int *start, int *end) { *start = al->faulty; *end = al->count; } -/* Copy address number INDEX to IP_STORE. */ +/* Return whether this address list entry has been obtained from the + cache. */ -void -address_list_copy_one (struct address_list *al, int index, - unsigned char *ip_store) +int +address_list_cached_p (const struct address_list *al) { - assert (index >= al->faulty && index < al->count); - memcpy (ip_store, al->addresses + index, sizeof (ipv4_address)); + return al->from_cache; +} + +/* Return a pointer to the address at position POS. */ + +const ip_address * +address_list_address_at (const struct address_list *al, int pos) +{ + assert (pos >= al->faulty && pos < al->count); + return al->addresses + pos; } /* Check whether two address lists have all their IPs in common. */ int -address_list_match_all (struct address_list *al1, struct address_list *al2) +address_list_match_all (const struct address_list *al1, + const struct address_list *al2) { +#ifdef ENABLE_IPV6 + int i; +#endif if (al1 == al2) return 1; if (al1->count != al2->count) return 0; + + /* For the comparison to be complete, we'd need to sort the IP + addresses first. But that's not necessary because this is only + used as an optimization. */ + +#ifndef ENABLE_IPV6 + /* In the non-IPv6 case, there is only one address type, so we can + compare the whole array with memcmp. */ return 0 == memcmp (al1->addresses, al2->addresses, - al1->count * sizeof (ipv4_address)); + al1->count * sizeof (ip_address)); +#else /* ENABLE_IPV6 */ + for (i = 0; i < al1->count; ++i) + { + const ip_address *ip1 = &al1->addresses[i]; + const ip_address *ip2 = &al2->addresses[i]; + + if (ip1->type != ip2->type) + return 0; + + switch (ip1->type) + { + case IPV4_ADDRESS: + if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr + != ADDRESS_IPV4_IN_ADDR (ip2).s_addr) + return 0; + break; + case IPV6_ADDRESS: +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2)) + return 0; +#endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */ + if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1), + &ADDRESS_IPV6_IN6_ADDR (ip2))) + return 0; + break; + default: + abort (); + } + } + return 1; +#endif /* ENABLE_IPV6 */ } /* Mark the INDEXth element of AL as faulty, so that the next time @@ -137,44 +209,91 @@ address_list_set_faulty (struct address_list *al, int index) al->faulty = 0; } -/* Create an address_list out of a NULL-terminated list of addresses, - as returned by gethostbyname. */ - +#ifdef ENABLE_IPV6 +/** + * address_list_from_addrinfo + * + * This function transform an addrinfo links list in and address_list. + * + * Input: + * addrinfo* Linked list of addrinfo + * + * Output: + * address_list* New allocated address_list + */ static struct address_list * -address_list_new (char **h_addr_list) +address_list_from_addrinfo (const struct addrinfo *ai) { - int count = 0, i; + struct address_list *al; + const struct addrinfo *ptr; + int cnt; + ip_address *ip; + + cnt = 0; + for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next) + if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6) + ++cnt; + if (cnt == 0) + return NULL; + + al = xnew0 (struct address_list); + al->addresses = xnew_array (ip_address, cnt); + al->count = cnt; + al->refcount = 1; + + ip = al->addresses; + for (ptr = ai; ptr != NULL; ptr = ptr->ai_next) + if (ptr->ai_family == AF_INET6) + { + const struct sockaddr_in6 *sin6 = + (const struct sockaddr_in6 *)ptr->ai_addr; + ip->type = IPV6_ADDRESS; + ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr; +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id; +#endif + ++ip; + } + else if (ptr->ai_family == AF_INET) + { + const struct sockaddr_in *sin = + (const struct sockaddr_in *)ptr->ai_addr; + ip->type = IPV4_ADDRESS; + ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr; + ++ip; + } + assert (ip - al->addresses == cnt); + return al; +} +#else +/* Create an address_list from a NULL-terminated vector of IPv4 + addresses. This kind of vector is returned by gethostbyname. */ - struct address_list *al = xmalloc (sizeof (struct address_list)); +static struct address_list * +address_list_from_ipv4_addresses (char **h_addr_list) +{ + int count, i; + struct address_list *al = xnew0 (struct address_list); + count = 0; while (h_addr_list[count]) ++count; assert (count > 0); - al->count = count; - al->faulty = 0; - al->addresses = xmalloc (count * sizeof (ipv4_address)); - al->refcount = 1; - for (i = 0; i < count; i++) - memcpy (al->addresses + i, h_addr_list[i], sizeof (ipv4_address)); + al->addresses = xnew_array (ip_address, count); + al->count = count; + al->refcount = 1; - return al; -} - -/* Like address_list_new, but initialized with only one address. */ - -static struct address_list * -address_list_new_one (const char *addr) -{ - struct address_list *al = xmalloc (sizeof (struct address_list)); - al->count = 1; - al->faulty = 0; - al->addresses = xmalloc (sizeof (ipv4_address)); - al->refcount = 1; - memcpy (al->addresses, addr, sizeof (ipv4_address)); + for (i = 0; i < count; i++) + { + ip_address *ip = &al->addresses[i]; + ip->type = IPV4_ADDRESS; + memcpy (ADDRESS_IPV4_DATA (ip), h_addr_list[i], 4); + } return al; } +#endif static void address_list_delete (struct address_list *al) @@ -195,13 +314,118 @@ address_list_release (struct address_list *al) } } -/* The same as inet_ntoa, but without the need for a cast, or for - #including the netinet stuff. */ +/* Versions of gethostbyname and getaddrinfo that support timeout. */ -char * -pretty_print_address (const void *addr) +#ifndef ENABLE_IPV6 + +struct ghbnwt_context { + const char *host_name; + struct hostent *hptr; +}; + +static void +gethostbyname_with_timeout_callback (void *arg) +{ + struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg; + ctx->hptr = gethostbyname (ctx->host_name); +} + +/* Just like gethostbyname, except it times out after TIMEOUT seconds. + In case of timeout, NULL is returned and errno is set to ETIMEDOUT. + The function makes sure that when NULL is returned for reasons + other than timeout, errno is reset. */ + +static struct hostent * +gethostbyname_with_timeout (const char *host_name, double timeout) +{ + struct ghbnwt_context ctx; + ctx.host_name = host_name; + if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx)) + { + SET_H_ERRNO (HOST_NOT_FOUND); + errno = ETIMEDOUT; + return NULL; + } + if (!ctx.hptr) + errno = 0; + return ctx.hptr; +} + +#else /* ENABLE_IPV6 */ + +struct gaiwt_context { + const char *node; + const char *service; + const struct addrinfo *hints; + struct addrinfo **res; + int exit_code; +}; + +static void +getaddrinfo_with_timeout_callback (void *arg) +{ + struct gaiwt_context *ctx = (struct gaiwt_context *)arg; + ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res); +} + +/* Just like getaddrinfo, except it times out after TIMEOUT seconds. + In case of timeout, the EAI_SYSTEM error code is returned and errno + is set to ETIMEDOUT. */ + +static int +getaddrinfo_with_timeout (const char *node, const char *service, + const struct addrinfo *hints, struct addrinfo **res, + double timeout) { - return inet_ntoa (*(struct in_addr *)addr); + struct gaiwt_context ctx; + ctx.node = node; + ctx.service = service; + ctx.hints = hints; + ctx.res = res; + + if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx)) + { + errno = ETIMEDOUT; + return EAI_SYSTEM; + } + return ctx.exit_code; +} + +#endif /* ENABLE_IPV6 */ + +/* Pretty-print ADDR. When compiled without IPv6, this is the same as + inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4 + address. */ + +const char * +pretty_print_address (const ip_address *addr) +{ + switch (addr->type) + { + case IPV4_ADDRESS: + return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr)); +#ifdef ENABLE_IPV6 + case IPV6_ADDRESS: + { + static char buf[128]; + inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf)); +#if 0 +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + { + /* append "%SCOPE_ID" for all ?non-global? addresses */ + char *p = buf + strlen (buf); + *p++ = '%'; + number_to_string (p, ADDRESS_IPV6_SCOPE (addr)); + } +#endif +#endif + buf[sizeof (buf) - 1] = '\0'; + return buf; + } +#endif + } + abort (); + return NULL; } /* Add host name HOST with the address ADDR_TEXT to the cache. @@ -217,7 +441,7 @@ cache_host_lookup (const char *host, struct address_list *al) ++al->refcount; hash_table_put (host_name_addresses_map, xstrdup_lower (host), al); -#ifdef DEBUG +#ifdef ENABLE_DEBUG if (opt.debug) { int i; @@ -229,65 +453,157 @@ cache_host_lookup (const char *host, struct address_list *al) #endif } +void +forget_host_lookup (const char *host) +{ + struct address_list *al = hash_table_get (host_name_addresses_map, host); + if (al) + { + address_list_release (al); + hash_table_remove (host_name_addresses_map, host); + } +} + struct address_list * -lookup_host (const char *host, int silent) +lookup_host (const char *host, int flags) { struct address_list *al = NULL; - unsigned long addr; - struct hostent *hptr; - /* If the address is of the form d.d.d.d, no further lookup is - needed. */ - addr = (unsigned long)inet_addr (host); - if ((int)addr != -1) +#ifdef ENABLE_IPV6 + int err, family; + struct addrinfo hints, *res; + + /* Is this necessary? Should this function be changed to accept a + FAMILY argument? */ + if (flags & LH_IPV4_ONLY) + family = AF_INET; + else if (flags & LH_IPV6_ONLY) + family = AF_INET6; + else + family = ip_default_family; +#endif + + /* First, try to check whether the address is already a numeric + address, in which case we don't need to cache it or bother with + setting up timeouts. Plus, if memory serves me right, Ultrix's + gethostbyname can't handle numeric addresses (!). + + Where getaddrinfo is available, we do it using the AI_NUMERICHOST + flag. Without IPv6, we use inet_addr succeeds. */ + +#ifdef ENABLE_IPV6 + xzero (hints); + hints.ai_family = family; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_NUMERICHOST; + if (flags & LH_PASSIVE) + hints.ai_flags = AI_PASSIVE; + + /* no need to call getaddrinfo_with_timeout here, as we're not + * relying on the DNS, but we're only doing an address translation + * from presentation (ASCII) to network format */ + err = getaddrinfo (host, NULL, &hints, &res); + if (err == 0 && res != NULL) { - /* ADDR is defined to be in network byte order, which is what - this returns, so we can just copy it to STORE_IP. However, - on big endian 64-bit architectures the value will be stored - in the *last*, not first four bytes. OFFSET makes sure that - we copy the correct four bytes. */ - int offset; -#ifdef WORDS_BIGENDIAN - offset = sizeof (unsigned long) - sizeof (ipv4_address); + al = address_list_from_addrinfo (res); + freeaddrinfo (res); + return al; + } #else - offset = 0; + { + uint32_t addr_ipv4 = (uint32_t)inet_addr (host); + if (addr_ipv4 != (uint32_t) -1) + { + /* The return value of inet_addr is in network byte order, so + we can just copy it to IP. */ + char **vec[2]; + vec[0] = (char *)&addr_ipv4; + vec[1] = NULL; + return address_list_from_ipv4_addresses (vec); + } + } #endif - return address_list_new_one ((char *)&addr + offset); - } - /* By now we know that the host name we got is not of the form - d.d.d.d. Try to find it in our cache of host names. */ - if (host_name_addresses_map) - al = hash_table_get (host_name_addresses_map, host); + /* Then, try to find the host in the cache. */ - if (al) + if (host_name_addresses_map) { - DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al)); - ++al->refcount; - return al; + al = hash_table_get (host_name_addresses_map, host); + if (al) + { + DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al)); + ++al->refcount; + al->from_cache = 1; + return al; + } } - if (!silent) + if (!(flags & LH_SILENT)) logprintf (LOG_VERBOSE, _("Resolving %s... "), host); - /* Look up the host using gethostbyname(). */ - hptr = gethostbyname (host); - if (!hptr) + /* Host name lookup goes on below. */ + +#ifdef ENABLE_IPV6 + { + xzero (hints); + hints.ai_family = family; + hints.ai_socktype = SOCK_STREAM; + if (flags & LH_PASSIVE) + hints.ai_flags = AI_PASSIVE; + + err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout); + + if (err != 0 || res == NULL) + { + if (!(flags & LH_SILENT)) + logprintf (LOG_VERBOSE, _("failed: %s.\n"), + err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno)); + return NULL; + } + al = address_list_from_addrinfo (res); + freeaddrinfo (res); + } +#else + { + struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout); + if (!hptr) + { + if (!(flags & LH_SILENT)) + { + if (errno != ETIMEDOUT) + logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno)); + else + logputs (LOG_VERBOSE, _("failed: timed out.\n")); + } + return NULL; + } + assert (hptr->h_length == 4); + /* Do older systems have h_addr_list? */ + al = address_list_from_ipv4_addresses (hptr->h_addr_list); + } +#endif + + /* Print the addresses determined by DNS lookup, but no more than + three. */ + if (!(flags & LH_SILENT)) { - if (!silent) - logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno)); - return NULL; + int i; + int printmax = al->count <= 3 ? al->count : 3; + for (i = 0; i < printmax; i++) + { + logprintf (LOG_VERBOSE, "%s", + pretty_print_address (al->addresses + i)); + if (i < printmax - 1) + logputs (LOG_VERBOSE, ", "); + } + if (printmax != al->count) + logputs (LOG_VERBOSE, ", ..."); + logputs (LOG_VERBOSE, "\n"); } - if (!silent) - logprintf (LOG_VERBOSE, _("done.\n")); - - /* Do all systems have h_addr_list, or is it a newer thing? If the - latter, use address_list_new_one. */ - al = address_list_new (hptr->h_addr_list); - /* Cache the lookup information. */ - cache_host_lookup (host, al); + if (opt.dns_cache) + cache_host_lookup (host, al); return al; }