X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhost.c;h=11de5944352d91d6b805b8bc00ad832ee8097ca3;hp=6274d0ec8d4e917fbdd1bdf9c7d473fcf210cd24;hb=4d7c5e087b2bc82c9f503dff003916d1047903ce;hpb=8e330fdba016c5e521ea8e9c11e6df011c8666b1 diff --git a/src/host.c b/src/host.c index 6274d0ec..11de5944 100644 --- a/src/host.c +++ b/src/host.c @@ -1,11 +1,11 @@ /* Host name resolution and matching. - Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1996-2006 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget. If not, see . In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -29,24 +28,12 @@ so, delete this exception statement from your version. */ #include -#ifndef WINDOWS -#include -#endif - #include #include -#ifdef HAVE_STRING_H -# include -#else -# include -#endif +#include #include -#include -#ifdef WINDOWS -# include -# define SET_H_ERRNO(err) WSASetLastError (err) -#else +#ifndef WINDOWS # include # include # ifndef __BEOS__ @@ -54,12 +41,10 @@ so, delete this exception statement from your version. */ # endif # include # define SET_H_ERRNO(err) ((void)(h_errno = (err))) +#else /* WINDOWS */ +# define SET_H_ERRNO(err) WSASetLastError (err) #endif /* WINDOWS */ -#ifndef NO_ADDRESS -# define NO_ADDRESS NO_DATA -#endif - #include #include "wget.h" @@ -68,14 +53,8 @@ so, delete this exception statement from your version. */ #include "url.h" #include "hash.h" -#ifndef errno -extern int errno; -#endif - -#ifndef h_errno -# ifndef __CYGWIN__ -extern int h_errno; -# endif +#ifndef NO_ADDRESS +# define NO_ADDRESS NO_DATA #endif /* Lists of IP addresses that result from running DNS queries. See @@ -86,7 +65,7 @@ struct address_list { ip_address *addresses; /* pointer to the string of addresses */ int faulty; /* number of addresses known not to work. */ - int connected; /* whether we were able to connect to + bool connected; /* whether we were able to connect to one of the addresses in the list, at least once. */ @@ -112,43 +91,39 @@ address_list_address_at (const struct address_list *al, int pos) return al->addresses + pos; } -/* Return 1 if IP is one of the addresses in AL. */ +/* Return true if AL contains IP, false otherwise. */ -int -address_list_find (const struct address_list *al, const ip_address *ip) +bool +address_list_contains (const struct address_list *al, const ip_address *ip) { int i; - switch (ip->type) + switch (ip->family) { - case IPV4_ADDRESS: + case AF_INET: for (i = 0; i < al->count; i++) { ip_address *cur = al->addresses + i; - if (cur->type == IPV4_ADDRESS - && (ADDRESS_IPV4_IN_ADDR (cur).s_addr - == - ADDRESS_IPV4_IN_ADDR (ip).s_addr)) - return 1; + if (cur->family == AF_INET + && (cur->data.d4.s_addr == ip->data.d4.s_addr)) + return true; } - return 0; + return false; #ifdef ENABLE_IPV6 - case IPV6_ADDRESS: + case AF_INET6: for (i = 0; i < al->count; i++) { ip_address *cur = al->addresses + i; - if (cur->type == IPV6_ADDRESS + if (cur->family == AF_INET6 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID - && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip) + && cur->ipv6_scope == ip->ipv6_scope #endif - && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur), - &ADDRESS_IPV6_IN6_ADDR (ip))) - return 1; + && IN6_ARE_ADDR_EQUAL (&cur->data.d6, &ip->data.d6)) + return true; } - return 0; + return false; #endif /* ENABLE_IPV6 */ default: abort (); - return 1; } } @@ -178,12 +153,12 @@ address_list_set_faulty (struct address_list *al, int index) void address_list_set_connected (struct address_list *al) { - al->connected = 1; + al->connected = true; } /* Return the value of the "connected" flag. */ -int +bool address_list_connected_p (const struct address_list *al) { return al->connected; @@ -210,9 +185,9 @@ address_list_from_addrinfo (const struct addrinfo *ai) return NULL; al = xnew0 (struct address_list); - al->addresses = xnew_array (ip_address, cnt); - al->count = cnt; - al->refcount = 1; + al->addresses = xnew_array (ip_address, cnt); + al->count = cnt; + al->refcount = 1; ip = al->addresses; for (ptr = ai; ptr != NULL; ptr = ptr->ai_next) @@ -220,10 +195,10 @@ address_list_from_addrinfo (const struct addrinfo *ai) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)ptr->ai_addr; - ip->type = IPV6_ADDRESS; - ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr; + ip->family = AF_INET6; + ip->data.d6 = sin6->sin6_addr; #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID - ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id; + ip->ipv6_scope = sin6->sin6_scope_id; #endif ++ip; } @@ -231,14 +206,40 @@ address_list_from_addrinfo (const struct addrinfo *ai) { const struct sockaddr_in *sin = (const struct sockaddr_in *)ptr->ai_addr; - ip->type = IPV4_ADDRESS; - ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr; + ip->family = AF_INET; + ip->data.d4 = sin->sin_addr; ++ip; } assert (ip - al->addresses == cnt); return al; } +#define IS_IPV4(addr) (((const ip_address *) addr)->family == AF_INET) + +/* Compare two IP addresses by family, giving preference to the IPv4 + address (sorting it first). In other words, return -1 if ADDR1 is + IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and + 0 otherwise. + + This is intended to be used as the comparator arg to a qsort-like + sorting function, which is why it accepts generic pointers. */ + +static int +cmp_prefer_ipv4 (const void *addr1, const void *addr2) +{ + return !IS_IPV4 (addr1) - !IS_IPV4 (addr2); +} + +#define IS_IPV6(addr) (((const ip_address *) addr)->family == AF_INET6) + +/* Like the above, but give preference to the IPv6 address. */ + +static int +cmp_prefer_ipv6 (const void *addr1, const void *addr2) +{ + return !IS_IPV6 (addr1) - !IS_IPV6 (addr2); +} + #else /* not ENABLE_IPV6 */ /* Create an address_list from a NULL-terminated vector of IPv4 @@ -262,8 +263,8 @@ address_list_from_ipv4_addresses (char **vec) for (i = 0; i < count; i++) { ip_address *ip = &al->addresses[i]; - ip->type = IPV4_ADDRESS; - memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4); + ip->family = AF_INET; + memcpy (IP_INADDR_DATA (ip), vec[i], 4); } return al; @@ -286,10 +287,11 @@ void address_list_release (struct address_list *al) { --al->refcount; - DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount)); + DEBUGP (("Releasing 0x%0*lx (new refcount %d).\n", PTR_FORMAT (al), + al->refcount)); if (al->refcount <= 0) { - DEBUGP (("Deleting unused %p.\n", al)); + DEBUGP (("Deleting unused 0x%0*lx.\n", PTR_FORMAT (al))); address_list_delete (al); } } @@ -393,39 +395,166 @@ getaddrinfo_with_timeout (const char *node, const char *service, #endif /* ENABLE_IPV6 */ -/* Pretty-print ADDR. When compiled without IPv6, this is the same as - inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4 - address. */ +/* Return a textual representation of ADDR, i.e. the dotted quad for + IPv4 addresses, and the colon-separated list of hex words (with all + zeros omitted, etc.) for IPv6 addresses. */ const char * -pretty_print_address (const ip_address *addr) +print_address (const ip_address *addr) { - switch (addr->type) - { - case IPV4_ADDRESS: - return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr)); #ifdef ENABLE_IPV6 - case IPV6_ADDRESS: - { - static char buf[128]; - inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf)); -#if 0 -#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + static char buf[64]; + if (!inet_ntop (addr->family, IP_INADDR_DATA (addr), buf, sizeof buf)) + snprintf (buf, sizeof buf, "", strerror (errno)); + return buf; +#else + return inet_ntoa (addr->data.d4); +#endif +} + +/* The following two functions were adapted from glibc's + implementation of inet_pton, written by Paul Vixie. */ + +static bool +is_valid_ipv4_address (const char *str, const char *end) +{ + bool saw_digit = false; + int octets = 0; + int val = 0; + + while (str < end) + { + int ch = *str++; + + if (ch >= '0' && ch <= '9') { - /* append "%SCOPE_ID" for all ?non-global? addresses */ - char *p = buf + strlen (buf); - *p++ = '%'; - number_to_string (p, ADDRESS_IPV6_SCOPE (addr)); + val = val * 10 + (ch - '0'); + + if (val > 255) + return false; + if (!saw_digit) + { + if (++octets > 4) + return false; + saw_digit = true; + } } -#endif -#endif - buf[sizeof (buf) - 1] = '\0'; - return buf; - } -#endif + else if (ch == '.' && saw_digit) + { + if (octets == 4) + return false; + val = 0; + saw_digit = false; + } + else + return false; } - abort (); - return NULL; + if (octets < 4) + return false; + + return true; +} + +bool +is_valid_ipv6_address (const char *str, const char *end) +{ + /* Use lower-case for these to avoid clash with system headers. */ + enum { + ns_inaddrsz = 4, + ns_in6addrsz = 16, + ns_int16sz = 2 + }; + + const char *curtok; + int tp; + const char *colonp; + bool saw_xdigit; + unsigned int val; + + tp = 0; + colonp = NULL; + + if (str == end) + return false; + + /* Leading :: requires some special handling. */ + if (*str == ':') + { + ++str; + if (str == end || *str != ':') + return false; + } + + curtok = str; + saw_xdigit = false; + val = 0; + + while (str < end) + { + int ch = *str++; + + /* if ch is a number, add it to val. */ + if (ISXDIGIT (ch)) + { + val <<= 4; + val |= XDIGIT_TO_NUM (ch); + if (val > 0xffff) + return false; + saw_xdigit = true; + continue; + } + + /* if ch is a colon ... */ + if (ch == ':') + { + curtok = str; + if (!saw_xdigit) + { + if (colonp != NULL) + return false; + colonp = str + tp; + continue; + } + else if (str == end) + return false; + if (tp > ns_in6addrsz - ns_int16sz) + return false; + tp += ns_int16sz; + saw_xdigit = false; + val = 0; + continue; + } + + /* if ch is a dot ... */ + if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz) + && is_valid_ipv4_address (curtok, end) == 1) + { + tp += ns_inaddrsz; + saw_xdigit = false; + break; + } + + return false; + } + + if (saw_xdigit) + { + if (tp > ns_in6addrsz - ns_int16sz) + return false; + tp += ns_int16sz; + } + + if (colonp != NULL) + { + if (tp == ns_in6addrsz) + return false; + tp = ns_in6addrsz; + } + + if (tp != ns_in6addrsz) + return false; + + return true; } /* Simple host cache, used by lookup_host to speed up resolving. The @@ -468,16 +597,14 @@ cache_store (const char *host, struct address_list *al) ++al->refcount; hash_table_put (host_name_addresses_map, xstrdup_lower (host), al); -#ifdef ENABLE_DEBUG - if (opt.debug) + IF_DEBUG { int i; debug_logprintf ("Caching %s =>", host); for (i = 0; i < al->count; i++) - debug_logprintf (" %s", pretty_print_address (al->addresses + i)); + debug_logprintf (" %s", print_address (al->addresses + i)); debug_logprintf ("\n"); } -#endif } /* Remove HOST from the DNS cache. Does nothing is HOST is not in @@ -497,14 +624,19 @@ cache_remove (const char *host) } } -/* Look up HOST in DNS and return a list of IP addresses. The - addresses in the list are in the same order in which - gethostbyname/getaddrinfo returned them. +/* Look up HOST in DNS and return a list of IP addresses. This function caches its result so that, if the same host is passed - the second time, the addresses are returned without DNS lookup. If - you want to force lookup, call forget_host_lookup() prior to this - function, or set opt.dns_cache to 0 to globally disable caching. + the second time, the addresses are returned without DNS lookup. + (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to + globally disable caching.) + + The order of the returned addresses is affected by the setting of + opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are + placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed + at the beginning; otherwise, the order is left intact. The + relative order of addresses with the same family is left + undisturbed in either case. FLAGS can be a combination of: LH_SILENT - don't print the "resolving ... done" messages. @@ -517,15 +649,17 @@ cache_remove (const char *host) struct address_list * lookup_host (const char *host, int flags) { - struct address_list *al = NULL; - int silent = flags & LH_SILENT; - int use_cache; + struct address_list *al; + bool silent = !!(flags & LH_SILENT); + bool use_cache; + bool numeric_address = false; + double timeout = opt.dns_timeout; #ifndef ENABLE_IPV6 /* If we're not using getaddrinfo, first check if HOST specifies a - numeric IPv4 address. gethostbyname is not required to accept - dotted-decimal IPv4 addresses, and some implementations (e.g. the - Ultrix one and possibly Winsock) indeed don't. */ + numeric IPv4 address. Some implementations of gethostbyname + (e.g. the Ultrix one and possibly Winsock) don't accept + dotted-decimal IPv4 addresses. */ { uint32_t addr_ipv4 = (uint32_t)inet_addr (host); if (addr_ipv4 != (uint32_t) -1) @@ -538,14 +672,25 @@ lookup_host (const char *host, int flags) return address_list_from_ipv4_addresses (vec); } } +#else /* ENABLE_IPV6 */ + /* If we're using getaddrinfo, at least check whether the address is + already numeric, in which case there is no need to print the + "Resolving..." output. (This comes at no additional cost since + the is_valid_ipv*_address are already required for + url_parse.) */ + { + const char *end = host + strlen (host); + if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end)) + numeric_address = true; + } #endif /* Cache is normally on, but can be turned off with --no-dns-cache. Don't cache passive lookups under IPv6. */ use_cache = opt.dns_cache; #ifdef ENABLE_IPV6 - if (flags & LH_BIND) - use_cache = 0; + if ((flags & LH_BIND) || numeric_address) + use_cache = false; #endif /* Try to find the host in the cache so we don't need to talk to the @@ -565,8 +710,8 @@ lookup_host (const char *host, int flags) /* No luck with the cache; resolve HOST. */ - if (!silent) - logprintf (LOG_VERBOSE, _("Resolving %s... "), host); + if (!silent && !numeric_address) + logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host)); #ifdef ENABLE_IPV6 { @@ -575,20 +720,32 @@ lookup_host (const char *host, int flags) xzero (hints); hints.ai_socktype = SOCK_STREAM; - hints.ai_family = AF_UNSPEC; - if (opt.ipv4_only && !opt.ipv6_only) + if (opt.ipv4_only) hints.ai_family = AF_INET; - else if (opt.ipv6_only && !opt.ipv4_only) + else if (opt.ipv6_only) hints.ai_family = AF_INET6; + else + /* We tried using AI_ADDRCONFIG, but removed it because: it + misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and + it's unneeded since we sort the addresses anyway. */ + hints.ai_family = AF_UNSPEC; -#ifdef HAVE_GETADDRINFO_AI_ADDRCONFIG - /* Use AI_ADDRCONFIG where available. See init.c:defaults(). */ - hints.ai_flags |= AI_ADDRCONFIG; -#endif if (flags & LH_BIND) hints.ai_flags |= AI_PASSIVE; - err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout); +#ifdef AI_NUMERICHOST + if (numeric_address) + { + /* Where available, the AI_NUMERICHOST hint can prevent costly + access to DNS servers. */ + hints.ai_flags |= AI_NUMERICHOST; + timeout = 0; /* no timeout needed when "resolving" + numeric hosts -- avoid setting up + signal handlers and such. */ + } +#endif + + err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout); if (err != 0 || res == NULL) { if (!silent) @@ -604,10 +761,18 @@ lookup_host (const char *host, int flags) _("failed: No IPv4/IPv6 addresses for host.\n")); return NULL; } + + /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per + --prefer-family) come first. Sorting is stable so the order of + the addresses with the same family is undisturbed. */ + if (al->count > 1 && opt.prefer_family != prefer_none) + stable_sort (al->addresses, al->count, sizeof (ip_address), + opt.prefer_family == prefer_ipv4 + ? cmp_prefer_ipv4 : cmp_prefer_ipv6); } -#else +#else /* not ENABLE_IPV6 */ { - struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout); + struct hostent *hptr = gethostbyname_with_timeout (host, timeout); if (!hptr) { if (!silent) @@ -623,18 +788,17 @@ lookup_host (const char *host, int flags) /* Do older systems have h_addr_list? */ al = address_list_from_ipv4_addresses (hptr->h_addr_list); } -#endif +#endif /* not ENABLE_IPV6 */ /* Print the addresses determined by DNS lookup, but no more than three. */ - if (!silent) + if (!silent && !numeric_address) { int i; int printmax = al->count <= 3 ? al->count : 3; for (i = 0; i < printmax; i++) { - logprintf (LOG_VERBOSE, "%s", - pretty_print_address (al->addresses + i)); + logputs (LOG_VERBOSE, print_address (al->addresses + i)); if (i < printmax - 1) logputs (LOG_VERBOSE, ", "); } @@ -652,21 +816,21 @@ lookup_host (const char *host, int flags) /* Determine whether a URL is acceptable to be followed, according to a list of domains to accept. */ -int +bool accept_domain (struct url *u) { assert (u->host != NULL); if (opt.domains) { if (!sufmatch ((const char **)opt.domains, u->host)) - return 0; + return false; } if (opt.exclude_domains) { if (sufmatch ((const char **)opt.exclude_domains, u->host)) - return 0; + return false; } - return 1; + return true; } /* Check whether WHAT is matched in LIST, each element of LIST being a @@ -674,7 +838,7 @@ accept_domain (struct url *u) match_backwards() in utils.c). If an element of LIST matched, 1 is returned, 0 otherwise. */ -int +bool sufmatch (const char **list, const char *what) { int i, j, k, lw; @@ -687,23 +851,9 @@ sufmatch (const char **list, const char *what) break; /* The domain must be first to reach to beginning. */ if (j == -1) - return 1; + return true; } - return 0; -} - -static int -host_cleanup_mapper (void *key, void *value, void *arg_ignored) -{ - struct address_list *al; - - xfree (key); /* host */ - - al = (struct address_list *)value; - assert (al->refcount == 1); - address_list_delete (al); - - return 0; + return false; } void @@ -711,7 +861,17 @@ host_cleanup (void) { if (host_name_addresses_map) { - hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (host_name_addresses_map, &iter); + hash_table_iter_next (&iter); + ) + { + char *host = iter.key; + struct address_list *al = iter.value; + xfree (host); + assert (al->refcount == 1); + address_list_delete (al); + } hash_table_destroy (host_name_addresses_map); host_name_addresses_map = NULL; }