X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhost.c;h=11de5944352d91d6b805b8bc00ad832ee8097ca3;hp=eeb4940d6190112797541b4c357b5ecfcf3668c6;hb=4d7c5e087b2bc82c9f503dff003916d1047903ce;hpb=b0b1c815c15e49c9172f59428810713097a65e37 diff --git a/src/host.c b/src/host.c index eeb4940d..11de5944 100644 --- a/src/host.c +++ b/src/host.c @@ -1,47 +1,50 @@ -/* Dealing with host names. - Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc. +/* Host name resolution and matching. + Copyright (C) 1996-2006 Free Software Foundation, Inc. -This file is part of Wget. +This file is part of GNU Wget. -This program is free software; you can redistribute it and/or modify +GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. +the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. -This program is distributed in the hope that it will be useful, +GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +along with Wget. If not, see . + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include #include #include -#include -#ifdef HAVE_STRING_H -# include -#else -# include -#endif +#include #include -#include -#ifdef WINDOWS -# include -#else +#ifndef WINDOWS # include # include -# include +# ifndef __BEOS__ +# include +# endif # include +# define SET_H_ERRNO(err) ((void)(h_errno = (err))) +#else /* WINDOWS */ +# define SET_H_ERRNO(err) WSASetLastError (err) #endif /* WINDOWS */ -#ifdef HAVE_SYS_UTSNAME_H -# include -#endif #include #include "wget.h" @@ -50,294 +53,784 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "url.h" #include "hash.h" -#ifndef errno -extern int errno; +#ifndef NO_ADDRESS +# define NO_ADDRESS NO_DATA #endif -/* Mapping between all known hosts to their addresses (n.n.n.n). */ -struct hash_table *host_name_address_map; +/* Lists of IP addresses that result from running DNS queries. See + lookup_host for details. */ -/* Mapping between all known addresses (n.n.n.n) to their hosts. This - is the inverse of host_name_address_map. These two tables share - the strdup'ed strings. */ -struct hash_table *host_address_name_map; +struct address_list { + int count; /* number of adrresses */ + ip_address *addresses; /* pointer to the string of addresses */ -/* Mapping between auxilliary (slave) and master host names. */ -struct hash_table *host_slave_master_map; + int faulty; /* number of addresses known not to work. */ + bool connected; /* whether we were able to connect to + one of the addresses in the list, + at least once. */ -/* Utility function: like xstrdup(), but also lowercases S. */ + int refcount; /* reference count; when it drops to + 0, the entry is freed. */ +}; -static char * -xstrdup_lower (const char *s) +/* Get the bounds of the address list. */ + +void +address_list_get_bounds (const struct address_list *al, int *start, int *end) { - char *copy = xstrdup (s); - char *p = copy; - for (; *p; p++) - *p = TOLOWER (*p); - return copy; + *start = al->faulty; + *end = al->count; } -/* The same as gethostbyname, but supports internet addresses of the - form `N.N.N.N'. On some systems gethostbyname() knows how to do - this automatically. */ -struct hostent * -ngethostbyname (const char *name) +/* Return a pointer to the address at position POS. */ + +const ip_address * +address_list_address_at (const struct address_list *al, int pos) { - struct hostent *hp; - unsigned long addr; + assert (pos >= al->faulty && pos < al->count); + return al->addresses + pos; +} - addr = (unsigned long)inet_addr (name); - if ((int)addr != -1) - hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET); - else - hp = gethostbyname (name); - return hp; +/* Return true if AL contains IP, false otherwise. */ + +bool +address_list_contains (const struct address_list *al, const ip_address *ip) +{ + int i; + switch (ip->family) + { + case AF_INET: + for (i = 0; i < al->count; i++) + { + ip_address *cur = al->addresses + i; + if (cur->family == AF_INET + && (cur->data.d4.s_addr == ip->data.d4.s_addr)) + return true; + } + return false; +#ifdef ENABLE_IPV6 + case AF_INET6: + for (i = 0; i < al->count; i++) + { + ip_address *cur = al->addresses + i; + if (cur->family == AF_INET6 +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + && cur->ipv6_scope == ip->ipv6_scope +#endif + && IN6_ARE_ADDR_EQUAL (&cur->data.d6, &ip->data.d6)) + return true; + } + return false; +#endif /* ENABLE_IPV6 */ + default: + abort (); + } +} + +/* Mark the INDEXth element of AL as faulty, so that the next time + this address list is used, the faulty element will be skipped. */ + +void +address_list_set_faulty (struct address_list *al, int index) +{ + /* We assume that the address list is traversed in order, so that a + "faulty" attempt is always preceded with all-faulty addresses, + and this is how Wget uses it. */ + assert (index == al->faulty); + + ++al->faulty; + if (al->faulty >= al->count) + /* All addresses have been proven faulty. Since there's not much + sense in returning the user an empty address list the next + time, we'll rather make them all clean, so that they can be + retried anew. */ + al->faulty = 0; } -/* Add host name HOST with the address ADDR_TEXT to the cache. - Normally this means that the (HOST, ADDR_TEXT) pair will be to - host_name_address_map and to host_address_name_map. (It is the - caller's responsibility to make sure that HOST is not already in - host_name_address_map.) +/* Set the "connected" flag to true. This flag used by connect.c to + see if the host perhaps needs to be resolved again. */ - If the ADDR_TEXT has already been seen and belongs to another host, - HOST will be added to host_slave_master_map instead. */ +void +address_list_set_connected (struct address_list *al) +{ + al->connected = true; +} -static void -add_host_to_cache (const char *host, const char *addr_text) +/* Return the value of the "connected" flag. */ + +bool +address_list_connected_p (const struct address_list *al) +{ + return al->connected; +} + +#ifdef ENABLE_IPV6 + +/* Create an address_list from the addresses in the given struct + addrinfo. */ + +static struct address_list * +address_list_from_addrinfo (const struct addrinfo *ai) +{ + struct address_list *al; + const struct addrinfo *ptr; + int cnt; + ip_address *ip; + + cnt = 0; + for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next) + if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6) + ++cnt; + if (cnt == 0) + return NULL; + + al = xnew0 (struct address_list); + al->addresses = xnew_array (ip_address, cnt); + al->count = cnt; + al->refcount = 1; + + ip = al->addresses; + for (ptr = ai; ptr != NULL; ptr = ptr->ai_next) + if (ptr->ai_family == AF_INET6) + { + const struct sockaddr_in6 *sin6 = + (const struct sockaddr_in6 *)ptr->ai_addr; + ip->family = AF_INET6; + ip->data.d6 = sin6->sin6_addr; +#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID + ip->ipv6_scope = sin6->sin6_scope_id; +#endif + ++ip; + } + else if (ptr->ai_family == AF_INET) + { + const struct sockaddr_in *sin = + (const struct sockaddr_in *)ptr->ai_addr; + ip->family = AF_INET; + ip->data.d4 = sin->sin_addr; + ++ip; + } + assert (ip - al->addresses == cnt); + return al; +} + +#define IS_IPV4(addr) (((const ip_address *) addr)->family == AF_INET) + +/* Compare two IP addresses by family, giving preference to the IPv4 + address (sorting it first). In other words, return -1 if ADDR1 is + IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and + 0 otherwise. + + This is intended to be used as the comparator arg to a qsort-like + sorting function, which is why it accepts generic pointers. */ + +static int +cmp_prefer_ipv4 (const void *addr1, const void *addr2) +{ + return !IS_IPV4 (addr1) - !IS_IPV4 (addr2); +} + +#define IS_IPV6(addr) (((const ip_address *) addr)->family == AF_INET6) + +/* Like the above, but give preference to the IPv6 address. */ + +static int +cmp_prefer_ipv6 (const void *addr1, const void *addr2) { - char *canonical_name = hash_table_get (host_address_name_map, addr_text); - if (canonical_name) + return !IS_IPV6 (addr1) - !IS_IPV6 (addr2); +} + +#else /* not ENABLE_IPV6 */ + +/* Create an address_list from a NULL-terminated vector of IPv4 + addresses. This kind of vector is returned by gethostbyname. */ + +static struct address_list * +address_list_from_ipv4_addresses (char **vec) +{ + int count, i; + struct address_list *al = xnew0 (struct address_list); + + count = 0; + while (vec[count]) + ++count; + assert (count > 0); + + al->addresses = xnew_array (ip_address, count); + al->count = count; + al->refcount = 1; + + for (i = 0; i < count; i++) { - DEBUGP (("Mapping %s to %s in host_slave_master_map.\n", - host, canonical_name)); - /* We've already dealt with that host under another name. */ - hash_table_put (host_slave_master_map, - xstrdup_lower (host), - xstrdup_lower (canonical_name)); + ip_address *ip = &al->addresses[i]; + ip->family = AF_INET; + memcpy (IP_INADDR_DATA (ip), vec[i], 4); } - else + + return al; +} + +#endif /* not ENABLE_IPV6 */ + +static void +address_list_delete (struct address_list *al) +{ + xfree (al->addresses); + xfree (al); +} + +/* Mark the address list as being no longer in use. This will reduce + its reference count which will cause the list to be freed when the + count reaches 0. */ + +void +address_list_release (struct address_list *al) +{ + --al->refcount; + DEBUGP (("Releasing 0x%0*lx (new refcount %d).\n", PTR_FORMAT (al), + al->refcount)); + if (al->refcount <= 0) { - /* This is really the first time we're dealing with that host. */ - char *h_copy = xstrdup_lower (host); - char *a_copy = xstrdup (addr_text); - DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy)); - hash_table_put (host_name_address_map, h_copy, a_copy); - hash_table_put (host_address_name_map, a_copy, h_copy); + DEBUGP (("Deleting unused 0x%0*lx.\n", PTR_FORMAT (al))); + address_list_delete (al); } } + +/* Versions of gethostbyname and getaddrinfo that support timeout. */ -/* Store the address of HOSTNAME, internet-style (four octets in - network order), to WHERE. First try to get the address from the - cache; if it is not available, call the DNS functions and update - the cache. +#ifndef ENABLE_IPV6 - Return 1 on successful finding of the hostname, 0 otherwise. */ -int -store_hostaddress (unsigned char *where, const char *hostname) -{ - unsigned long addr; - char *addr_text; - char *canonical_name; +struct ghbnwt_context { + const char *host_name; struct hostent *hptr; - struct in_addr in; - char *inet_s; - - /* If the address is of the form d.d.d.d, there will be no trouble - with it. */ - addr = (unsigned long)inet_addr (hostname); - /* If we have the numeric address, just store it. */ - if ((int)addr != -1) +}; + +static void +gethostbyname_with_timeout_callback (void *arg) +{ + struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg; + ctx->hptr = gethostbyname (ctx->host_name); +} + +/* Just like gethostbyname, except it times out after TIMEOUT seconds. + In case of timeout, NULL is returned and errno is set to ETIMEDOUT. + The function makes sure that when NULL is returned for reasons + other than timeout, errno is reset. */ + +static struct hostent * +gethostbyname_with_timeout (const char *host_name, double timeout) +{ + struct ghbnwt_context ctx; + ctx.host_name = host_name; + if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx)) { - /* ADDR is defined to be in network byte order, meaning the code - works on little and big endian 32-bit architectures without - change. On big endian 64-bit architectures we need to be - careful to copy the correct four bytes. */ - int offset; - have_addr: -#ifdef WORDS_BIGENDIAN - offset = sizeof (unsigned long) - 4; -#else - offset = 0; -#endif - memcpy (where, (char *)&addr + offset, 4); - return 1; + SET_H_ERRNO (HOST_NOT_FOUND); + errno = ETIMEDOUT; + return NULL; } + if (!ctx.hptr) + errno = 0; + return ctx.hptr; +} - /* By now we know that the address is not of the form d.d.d.d. Try - to find it in our cache of host addresses. */ - addr_text = hash_table_get (host_name_address_map, hostname); - if (addr_text) +/* Print error messages for host errors. */ +static char * +host_errstr (int error) +{ + /* Can't use switch since some of these constants can be equal, + which makes the compiler complain about duplicate case + values. */ + if (error == HOST_NOT_FOUND + || error == NO_RECOVERY + || error == NO_DATA + || error == NO_ADDRESS) + return _("Unknown host"); + else if (error == TRY_AGAIN) + /* Message modeled after what gai_strerror returns in similar + circumstances. */ + return _("Temporary failure in name resolution"); + else + return _("Unknown error"); +} + +#else /* ENABLE_IPV6 */ + +struct gaiwt_context { + const char *node; + const char *service; + const struct addrinfo *hints; + struct addrinfo **res; + int exit_code; +}; + +static void +getaddrinfo_with_timeout_callback (void *arg) +{ + struct gaiwt_context *ctx = (struct gaiwt_context *)arg; + ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res); +} + +/* Just like getaddrinfo, except it times out after TIMEOUT seconds. + In case of timeout, the EAI_SYSTEM error code is returned and errno + is set to ETIMEDOUT. */ + +static int +getaddrinfo_with_timeout (const char *node, const char *service, + const struct addrinfo *hints, struct addrinfo **res, + double timeout) +{ + struct gaiwt_context ctx; + ctx.node = node; + ctx.service = service; + ctx.hints = hints; + ctx.res = res; + + if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx)) { - DEBUGP (("Found %s in host_name_address_map: %s\n", - hostname, addr_text)); - addr = (unsigned long)inet_addr (addr_text); - goto have_addr; + errno = ETIMEDOUT; + return EAI_SYSTEM; } + return ctx.exit_code; +} - /* Maybe this host is known to us under another name. If so, we'll - find it in host_slave_master_map, and use the master name to find - its address in host_name_address_map. */ - canonical_name = hash_table_get (host_slave_master_map, hostname); - if (canonical_name) +#endif /* ENABLE_IPV6 */ + +/* Return a textual representation of ADDR, i.e. the dotted quad for + IPv4 addresses, and the colon-separated list of hex words (with all + zeros omitted, etc.) for IPv6 addresses. */ + +const char * +print_address (const ip_address *addr) +{ +#ifdef ENABLE_IPV6 + static char buf[64]; + if (!inet_ntop (addr->family, IP_INADDR_DATA (addr), buf, sizeof buf)) + snprintf (buf, sizeof buf, "", strerror (errno)); + return buf; +#else + return inet_ntoa (addr->data.d4); +#endif +} + +/* The following two functions were adapted from glibc's + implementation of inet_pton, written by Paul Vixie. */ + +static bool +is_valid_ipv4_address (const char *str, const char *end) +{ + bool saw_digit = false; + int octets = 0; + int val = 0; + + while (str < end) { - addr_text = hash_table_get (host_name_address_map, canonical_name); - assert (addr_text != NULL); - DEBUGP (("Found %s as slave of %s -> %s\n", - hostname, canonical_name, addr_text)); - addr = (unsigned long)inet_addr (addr_text); - goto have_addr; - } + int ch = *str++; + + if (ch >= '0' && ch <= '9') + { + val = val * 10 + (ch - '0'); - /* Since all else has failed, let's try gethostbyname(). Note that - we use gethostbyname() rather than ngethostbyname(), because we - already know that the address is not numerical. */ - hptr = gethostbyname (hostname); - if (!hptr) - return 0; - /* Copy the address of the host to socket description. */ - memcpy (where, hptr->h_addr_list[0], hptr->h_length); - assert (hptr->h_length == 4); - - /* Now that we've gone through the truoble of calling - gethostbyname(), we can store this valuable information to the - cache. First, we have to look for it by address to know if it's - already in the cache by another name. */ - /* Originally, we copied to in.s_addr, but it appears to be missing - on some systems. */ - memcpy (&in, *hptr->h_addr_list, sizeof (in)); - inet_s = inet_ntoa (in); - add_host_to_cache (hostname, inet_s); - return 1; + if (val > 255) + return false; + if (!saw_digit) + { + if (++octets > 4) + return false; + saw_digit = true; + } + } + else if (ch == '.' && saw_digit) + { + if (octets == 4) + return false; + val = 0; + saw_digit = false; + } + else + return false; + } + if (octets < 4) + return false; + + return true; } -/* Determine the "real" name of HOST, as perceived by Wget. If HOST - is referenced by more than one name, "real" name is considered to - be the first one encountered in the past. */ -char * -realhost (const char *host) +bool +is_valid_ipv6_address (const char *str, const char *end) { - struct in_addr in; - struct hostent *hptr; - char *master_name; + /* Use lower-case for these to avoid clash with system headers. */ + enum { + ns_inaddrsz = 4, + ns_in6addrsz = 16, + ns_int16sz = 2 + }; + + const char *curtok; + int tp; + const char *colonp; + bool saw_xdigit; + unsigned int val; + + tp = 0; + colonp = NULL; + + if (str == end) + return false; + + /* Leading :: requires some special handling. */ + if (*str == ':') + { + ++str; + if (str == end || *str != ':') + return false; + } - DEBUGP (("Checking for %s in host_name_address_map.\n", host)); - if (hash_table_exists (host_name_address_map, host)) + curtok = str; + saw_xdigit = false; + val = 0; + + while (str < end) { - DEBUGP (("Found; %s was already used, by that name.\n", host)); - return xstrdup_lower (host); + int ch = *str++; + + /* if ch is a number, add it to val. */ + if (ISXDIGIT (ch)) + { + val <<= 4; + val |= XDIGIT_TO_NUM (ch); + if (val > 0xffff) + return false; + saw_xdigit = true; + continue; + } + + /* if ch is a colon ... */ + if (ch == ':') + { + curtok = str; + if (!saw_xdigit) + { + if (colonp != NULL) + return false; + colonp = str + tp; + continue; + } + else if (str == end) + return false; + if (tp > ns_in6addrsz - ns_int16sz) + return false; + tp += ns_int16sz; + saw_xdigit = false; + val = 0; + continue; + } + + /* if ch is a dot ... */ + if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz) + && is_valid_ipv4_address (curtok, end) == 1) + { + tp += ns_inaddrsz; + saw_xdigit = false; + break; + } + + return false; } - DEBUGP (("Checking for %s in host_slave_master_map.\n", host)); - master_name = hash_table_get (host_slave_master_map, host); - if (master_name) + if (saw_xdigit) { - has_master: - DEBUGP (("Found; %s was already used, by the name %s.\n", - host, master_name)); - return xstrdup (master_name); + if (tp > ns_in6addrsz - ns_int16sz) + return false; + tp += ns_int16sz; } - DEBUGP (("First time I hear about %s by that name; looking it up.\n", - host)); - hptr = ngethostbyname (host); - if (hptr) + if (colonp != NULL) { - char *inet_s; - /* Originally, we copied to in.s_addr, but it appears to be - missing on some systems. */ - memcpy (&in, *hptr->h_addr_list, sizeof (in)); - inet_s = inet_ntoa (in); - - add_host_to_cache (host, inet_s); - - /* add_host_to_cache() can establish a slave-master mapping. */ - DEBUGP (("Checking again for %s in host_slave_master_map.\n", host)); - master_name = hash_table_get (host_slave_master_map, host); - if (master_name) - goto has_master; + if (tp == ns_in6addrsz) + return false; + tp = ns_in6addrsz; } - return xstrdup_lower (host); + if (tp != ns_in6addrsz) + return false; + + return true; } + +/* Simple host cache, used by lookup_host to speed up resolving. The + cache doesn't handle TTL because Wget is a fairly short-lived + application. Refreshing is attempted when connect fails, though -- + see connect_to_host. */ + +/* Mapping between known hosts and to lists of their addresses. */ +static struct hash_table *host_name_addresses_map; + + +/* Return the host's resolved addresses from the cache, if + available. */ -/* Compare two hostnames (out of URL-s if the arguments are URL-s), - taking care of aliases. It uses realhost() to determine a unique - hostname for each of two hosts. If simple_check is non-zero, only - strcmp() is used for comparison. */ -int -same_host (const char *u1, const char *u2) +static struct address_list * +cache_query (const char *host) { - const char *s; - char *p1, *p2; - char *real1, *real2; - - /* Skip protocol, if present. */ - u1 += skip_url (u1); - u2 += skip_url (u2); - u1 += skip_proto (u1); - u2 += skip_proto (u2); - - /* Skip username ans password, if present. */ - u1 += skip_uname (u1); - u2 += skip_uname (u2); - - for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++); - p1 = strdupdelim (s, u1); - for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++); - p2 = strdupdelim (s, u2); - DEBUGP (("Comparing hosts %s and %s...\n", p1, p2)); - if (strcasecmp (p1, p2) == 0) + struct address_list *al; + if (!host_name_addresses_map) + return NULL; + al = hash_table_get (host_name_addresses_map, host); + if (al) { - free (p1); - free (p2); - DEBUGP (("They are quite alike.\n")); - return 1; + DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al)); + ++al->refcount; + return al; } - else if (opt.simple_check) + return NULL; +} + +/* Cache the DNS lookup of HOST. Subsequent invocations of + lookup_host will return the cached value. */ + +static void +cache_store (const char *host, struct address_list *al) +{ + if (!host_name_addresses_map) + host_name_addresses_map = make_nocase_string_hash_table (0); + + ++al->refcount; + hash_table_put (host_name_addresses_map, xstrdup_lower (host), al); + + IF_DEBUG { - free (p1); - free (p2); - DEBUGP (("Since checking is simple, I'd say they are not the same.\n")); - return 0; + int i; + debug_logprintf ("Caching %s =>", host); + for (i = 0; i < al->count; i++) + debug_logprintf (" %s", print_address (al->addresses + i)); + debug_logprintf ("\n"); } - real1 = realhost (p1); - real2 = realhost (p2); - free (p1); - free (p2); - if (strcasecmp (real1, real2) == 0) +} + +/* Remove HOST from the DNS cache. Does nothing is HOST is not in + the cache. */ + +static void +cache_remove (const char *host) +{ + struct address_list *al; + if (!host_name_addresses_map) + return; + al = hash_table_get (host_name_addresses_map, host); + if (al) { - DEBUGP (("They are alike, after realhost()->%s.\n", real1)); - free (real1); - free (real2); - return 1; + address_list_release (al); + hash_table_remove (host_name_addresses_map, host); } - else +} + +/* Look up HOST in DNS and return a list of IP addresses. + + This function caches its result so that, if the same host is passed + the second time, the addresses are returned without DNS lookup. + (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to + globally disable caching.) + + The order of the returned addresses is affected by the setting of + opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are + placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed + at the beginning; otherwise, the order is left intact. The + relative order of addresses with the same family is left + undisturbed in either case. + + FLAGS can be a combination of: + LH_SILENT - don't print the "resolving ... done" messages. + LH_BIND - resolve addresses for use with bind, which under + IPv6 means to use AI_PASSIVE flag to getaddrinfo. + Passive lookups are not cached under IPv6. + LH_REFRESH - if HOST is cached, remove the entry from the cache + and resolve it anew. */ + +struct address_list * +lookup_host (const char *host, int flags) +{ + struct address_list *al; + bool silent = !!(flags & LH_SILENT); + bool use_cache; + bool numeric_address = false; + double timeout = opt.dns_timeout; + +#ifndef ENABLE_IPV6 + /* If we're not using getaddrinfo, first check if HOST specifies a + numeric IPv4 address. Some implementations of gethostbyname + (e.g. the Ultrix one and possibly Winsock) don't accept + dotted-decimal IPv4 addresses. */ + { + uint32_t addr_ipv4 = (uint32_t)inet_addr (host); + if (addr_ipv4 != (uint32_t) -1) + { + /* No need to cache host->addr relation, just return the + address. */ + char *vec[2]; + vec[0] = (char *)&addr_ipv4; + vec[1] = NULL; + return address_list_from_ipv4_addresses (vec); + } + } +#else /* ENABLE_IPV6 */ + /* If we're using getaddrinfo, at least check whether the address is + already numeric, in which case there is no need to print the + "Resolving..." output. (This comes at no additional cost since + the is_valid_ipv*_address are already required for + url_parse.) */ + { + const char *end = host + strlen (host); + if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end)) + numeric_address = true; + } +#endif + + /* Cache is normally on, but can be turned off with --no-dns-cache. + Don't cache passive lookups under IPv6. */ + use_cache = opt.dns_cache; +#ifdef ENABLE_IPV6 + if ((flags & LH_BIND) || numeric_address) + use_cache = false; +#endif + + /* Try to find the host in the cache so we don't need to talk to the + resolver. If LH_REFRESH is requested, remove HOST from the cache + instead. */ + if (use_cache) { - DEBUGP (("They are not the same (%s, %s).\n", real1, real2)); - free (real1); - free (real2); - return 0; + if (!(flags & LH_REFRESH)) + { + al = cache_query (host); + if (al) + return al; + } + else + cache_remove (host); } -} + /* No luck with the cache; resolve HOST. */ + + if (!silent && !numeric_address) + logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host)); + +#ifdef ENABLE_IPV6 + { + int err; + struct addrinfo hints, *res; + + xzero (hints); + hints.ai_socktype = SOCK_STREAM; + if (opt.ipv4_only) + hints.ai_family = AF_INET; + else if (opt.ipv6_only) + hints.ai_family = AF_INET6; + else + /* We tried using AI_ADDRCONFIG, but removed it because: it + misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and + it's unneeded since we sort the addresses anyway. */ + hints.ai_family = AF_UNSPEC; + + if (flags & LH_BIND) + hints.ai_flags |= AI_PASSIVE; + +#ifdef AI_NUMERICHOST + if (numeric_address) + { + /* Where available, the AI_NUMERICHOST hint can prevent costly + access to DNS servers. */ + hints.ai_flags |= AI_NUMERICHOST; + timeout = 0; /* no timeout needed when "resolving" + numeric hosts -- avoid setting up + signal handlers and such. */ + } +#endif + + err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout); + if (err != 0 || res == NULL) + { + if (!silent) + logprintf (LOG_VERBOSE, _("failed: %s.\n"), + err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno)); + return NULL; + } + al = address_list_from_addrinfo (res); + freeaddrinfo (res); + if (!al) + { + logprintf (LOG_VERBOSE, + _("failed: No IPv4/IPv6 addresses for host.\n")); + return NULL; + } + + /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per + --prefer-family) come first. Sorting is stable so the order of + the addresses with the same family is undisturbed. */ + if (al->count > 1 && opt.prefer_family != prefer_none) + stable_sort (al->addresses, al->count, sizeof (ip_address), + opt.prefer_family == prefer_ipv4 + ? cmp_prefer_ipv4 : cmp_prefer_ipv6); + } +#else /* not ENABLE_IPV6 */ + { + struct hostent *hptr = gethostbyname_with_timeout (host, timeout); + if (!hptr) + { + if (!silent) + { + if (errno != ETIMEDOUT) + logprintf (LOG_VERBOSE, _("failed: %s.\n"), + host_errstr (h_errno)); + else + logputs (LOG_VERBOSE, _("failed: timed out.\n")); + } + return NULL; + } + /* Do older systems have h_addr_list? */ + al = address_list_from_ipv4_addresses (hptr->h_addr_list); + } +#endif /* not ENABLE_IPV6 */ + + /* Print the addresses determined by DNS lookup, but no more than + three. */ + if (!silent && !numeric_address) + { + int i; + int printmax = al->count <= 3 ? al->count : 3; + for (i = 0; i < printmax; i++) + { + logputs (LOG_VERBOSE, print_address (al->addresses + i)); + if (i < printmax - 1) + logputs (LOG_VERBOSE, ", "); + } + if (printmax != al->count) + logputs (LOG_VERBOSE, ", ..."); + logputs (LOG_VERBOSE, "\n"); + } + + /* Cache the lookup information. */ + if (use_cache) + cache_store (host, al); + + return al; +} + /* Determine whether a URL is acceptable to be followed, according to a list of domains to accept. */ -int -accept_domain (struct urlinfo *u) +bool +accept_domain (struct url *u) { assert (u->host != NULL); if (opt.domains) { if (!sufmatch ((const char **)opt.domains, u->host)) - return 0; + return false; } if (opt.exclude_domains) { if (sufmatch ((const char **)opt.exclude_domains, u->host)) - return 0; + return false; } - return 1; + return true; } /* Check whether WHAT is matched in LIST, each element of LIST being a @@ -345,7 +838,7 @@ accept_domain (struct urlinfo *u) match_backwards() in utils.c). If an element of LIST matched, 1 is returned, 0 otherwise. */ -int +bool sufmatch (const char **list, const char *what) { int i, j, k, lw; @@ -358,165 +851,28 @@ sufmatch (const char **list, const char *what) break; /* The domain must be first to reach to beginning. */ if (j == -1) - return 1; + return true; } - return 0; + return false; } -/* Return email address of the form username@FQDN suitable for - anonymous FTP passwords. This process is error-prone, and the - escape hatch is the MY_HOST preprocessor constant, which can be - used to hard-code either your hostname or FQDN at compile-time. - - If the FQDN cannot be determined, a warning is printed, and the - function returns a short `username@' form, accepted by most - anonymous servers. - - If not even the username cannot be divined, it means things are - seriously fucked up, and Wget exits. */ -char * -ftp_getaddress (void) +void +host_cleanup (void) { - static char *address; - - /* Do the drill only the first time, as it won't change. */ - if (!address) + if (host_name_addresses_map) { - char userid[32]; /* 9 should be enough for Unix, but - I'd rather be on the safe side. */ - char *host, *fqdn; - - if (!pwd_cuserid (userid)) - { - logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"), - exec_name); - exit (1); - } -#ifdef MY_HOST - STRDUP_ALLOCA (host, MY_HOST); -#else /* not MY_HOST */ -#ifdef HAVE_UNAME - { - struct utsname ubuf; - if (uname (&ubuf) < 0) - { - logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"), - exec_name, strerror (errno)); - fqdn = ""; - goto giveup; - } - STRDUP_ALLOCA (host, ubuf.nodename); - } -#else /* not HAVE_UNAME */ -#ifdef HAVE_GETHOSTNAME - host = alloca (256); - if (gethostname (host, 256) < 0) + hash_table_iterator iter; + for (hash_table_iterate (host_name_addresses_map, &iter); + hash_table_iter_next (&iter); + ) { - logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"), - exec_name); - fqdn = ""; - goto giveup; + char *host = iter.key; + struct address_list *al = iter.value; + xfree (host); + assert (al->refcount == 1); + address_list_delete (al); } -#else /* not HAVE_GETHOSTNAME */ - #error Cannot determine host name. -#endif /* not HAVE_GETHOSTNAME */ -#endif /* not HAVE_UNAME */ -#endif /* not MY_HOST */ - /* If the address we got so far contains a period, don't bother - anymore. */ - if (strchr (host, '.')) - fqdn = host; - else - { - /* #### I've seen the following scheme fail on at least one - system! Do we care? */ - char *tmpstore; - /* According to Richard Stevens, the correct way to find the - FQDN is to (1) find the host name, (2) find its IP - address using gethostbyname(), and (3) get the FQDN using - gethostbyaddr(). So that's what we'll do. Step one has - been done above. */ - /* (2) */ - struct hostent *hp = gethostbyname (host); - if (!hp || !hp->h_addr_list) - { - logprintf (LOG_ALWAYS, _("\ -%s: Warning: cannot determine local IP address.\n"), - exec_name); - fqdn = ""; - goto giveup; - } - /* Copy the argument, so the call to gethostbyaddr doesn't - clobber it -- just in case. */ - tmpstore = (char *)alloca (hp->h_length); - memcpy (tmpstore, *hp->h_addr_list, hp->h_length); - /* (3) */ - hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype); - if (!hp || !hp->h_name) - { - logprintf (LOG_ALWAYS, _("\ -%s: Warning: cannot reverse-lookup local IP address.\n"), - exec_name); - fqdn = ""; - goto giveup; - } - if (!strchr (hp->h_name, '.')) - { -#if 0 - /* This gets ticked pretty often. Karl Berry reports - that there can be valid reasons for the local host - name not to be an FQDN, so I've decided to remove the - annoying warning. */ - logprintf (LOG_ALWAYS, _("\ -%s: Warning: reverse-lookup of local address did not yield FQDN!\n"), - exec_name); -#endif - fqdn = ""; - goto giveup; - } - /* Once we're here, hp->h_name contains the correct FQDN. */ - STRDUP_ALLOCA (fqdn, hp->h_name); - } - giveup: - address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1); - sprintf (address, "%s@%s", userid, fqdn); + hash_table_destroy (host_name_addresses_map); + host_name_addresses_map = NULL; } - return address; -} - -/* Print error messages for host errors. */ -char * -herrmsg (int error) -{ - /* Can't use switch since some constants are equal (at least on my - system), and the compiler signals "duplicate case value". */ - if (error == HOST_NOT_FOUND - || error == NO_RECOVERY - || error == NO_DATA - || error == NO_ADDRESS - || error == TRY_AGAIN) - return _("Host not found"); - else - return _("Unknown error"); -} - -void -clean_hosts (void) -{ - /* host_name_address_map and host_address_name_map share the - strings. Because of that, calling free_keys_and_values once - suffices for both. */ - free_keys_and_values (host_name_address_map); - hash_table_destroy (host_name_address_map); - hash_table_destroy (host_address_name_map); - free_keys_and_values (host_slave_master_map); - hash_table_destroy (host_slave_master_map); -} - -void -host_init (void) -{ - host_name_address_map = make_string_hash_table (0); - host_address_name_map = make_string_hash_table (0); - host_slave_master_map = make_string_hash_table (0); }