#include "host.h"
#include "url.h"
#include "hash.h"
+#include "connect.h" /* for socket_has_inet6 */
#ifndef errno
extern int errno;
# endif
#endif
-/* Mapping between known hosts and to lists of their addresses. */
-
-static struct hash_table *host_name_addresses_map;
-\f
/* Lists of IP addresses that result from running DNS queries. See
lookup_host for details. */
return al->addresses + pos;
}
-/* Return 1 if IP is one of the addresses in AL. */
+/* Return non-zero if AL contains IP, zero otherwise. */
int
-address_list_find (const struct address_list *al, const ip_address *ip)
+address_list_contains (const struct address_list *al, const ip_address *ip)
{
int i;
switch (ip->type)
#endif /* ENABLE_IPV6 */
default:
abort ();
- return 1;
+ return 0;
}
}
return NULL;
al = xnew0 (struct address_list);
- al->addresses = xnew_array (ip_address, cnt);
- al->count = cnt;
- al->refcount = 1;
+ al->addresses = xnew_array (ip_address, cnt);
+ al->count = cnt;
+ al->refcount = 1;
ip = al->addresses;
for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
++count;
assert (count > 0);
- al->addresses = xnew_array (ip_address, count);
- al->count = count;
- al->refcount = 1;
+ al->addresses = xnew_array (ip_address, count);
+ al->count = count;
+ al->refcount = 1;
for (i = 0; i < count; i++)
{
return NULL;
}
-/* Add host name HOST with the address ADDR_TEXT to the cache.
- ADDR_LIST is a NULL-terminated list of addresses, as in struct
- hostent. */
+/* The following two functions were adapted from glibc. */
+
+static int
+is_valid_ipv4_address (const char *str, const char *end)
+{
+ int saw_digit = 0;
+ int octets = 0;
+ int val = 0;
+
+ while (str < end)
+ {
+ int ch = *str++;
+
+ if (ch >= '0' && ch <= '9')
+ {
+ val = val * 10 + (ch - '0');
+
+ if (val > 255)
+ return 0;
+ if (saw_digit == 0)
+ {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ }
+ else if (ch == '.' && saw_digit == 1)
+ {
+ if (octets == 4)
+ return 0;
+ val = 0;
+ saw_digit = 0;
+ }
+ else
+ return 0;
+ }
+ if (octets < 4)
+ return 0;
+
+ return 1;
+}
+
+int
+is_valid_ipv6_address (const char *str, const char *end)
+{
+ enum {
+ NS_INADDRSZ = 4,
+ NS_IN6ADDRSZ = 16,
+ NS_INT16SZ = 2
+ };
+
+ const char *curtok;
+ int tp;
+ const char *colonp;
+ int saw_xdigit;
+ unsigned int val;
+
+ tp = 0;
+ colonp = NULL;
+
+ if (str == end)
+ return 0;
+
+ /* Leading :: requires some special handling. */
+ if (*str == ':')
+ {
+ ++str;
+ if (str == end || *str != ':')
+ return 0;
+ }
+
+ curtok = str;
+ saw_xdigit = 0;
+ val = 0;
+
+ while (str < end)
+ {
+ int ch = *str++;
+
+ /* if ch is a number, add it to val. */
+ if (ISXDIGIT (ch))
+ {
+ val <<= 4;
+ val |= XDIGIT_TO_NUM (ch);
+ if (val > 0xffff)
+ return 0;
+ saw_xdigit = 1;
+ continue;
+ }
+
+ /* if ch is a colon ... */
+ if (ch == ':')
+ {
+ curtok = str;
+ if (saw_xdigit == 0)
+ {
+ if (colonp != NULL)
+ return 0;
+ colonp = str + tp;
+ continue;
+ }
+ else if (str == end)
+ return 0;
+ if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+ return 0;
+ tp += NS_INT16SZ;
+ saw_xdigit = 0;
+ val = 0;
+ continue;
+ }
+
+ /* if ch is a dot ... */
+ if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ)
+ && is_valid_ipv4_address (curtok, end) == 1)
+ {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ break;
+ }
+
+ return 0;
+ }
+
+ if (saw_xdigit == 1)
+ {
+ if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+ return 0;
+ tp += NS_INT16SZ;
+ }
+
+ if (colonp != NULL)
+ {
+ if (tp == NS_IN6ADDRSZ)
+ return 0;
+ tp = NS_IN6ADDRSZ;
+ }
+
+ if (tp != NS_IN6ADDRSZ)
+ return 0;
+
+ return 1;
+}
+\f
+/* Simple host cache, used by lookup_host to speed up resolving. The
+ cache doesn't handle TTL because Wget is a fairly short-lived
+ application. Refreshing is attempted when connect fails, though --
+ see connect_to_host. */
+
+/* Mapping between known hosts and to lists of their addresses. */
+static struct hash_table *host_name_addresses_map;
+
+
+/* Return the host's resolved addresses from the cache, if
+ available. */
+
+static struct address_list *
+cache_query (const char *host)
+{
+ struct address_list *al;
+ if (!host_name_addresses_map)
+ return NULL;
+ al = hash_table_get (host_name_addresses_map, host);
+ if (al)
+ {
+ DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
+ ++al->refcount;
+ return al;
+ }
+ return NULL;
+}
+
+/* Cache the DNS lookup of HOST. Subsequent invocations of
+ lookup_host will return the cached value. */
static void
-cache_host_lookup (const char *host, struct address_list *al)
+cache_store (const char *host, struct address_list *al)
{
if (!host_name_addresses_map)
host_name_addresses_map = make_nocase_string_hash_table (0);
#endif
}
-/* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
+/* Remove HOST from the DNS cache. Does nothing is HOST is not in
the cache. */
-void
-forget_host_lookup (const char *host)
+static void
+cache_remove (const char *host)
{
- struct address_list *al = hash_table_get (host_name_addresses_map, host);
+ struct address_list *al;
+ if (!host_name_addresses_map)
+ return;
+ al = hash_table_get (host_name_addresses_map, host);
if (al)
{
address_list_release (al);
hash_table_remove (host_name_addresses_map, host);
}
}
-
+\f
/* Look up HOST in DNS and return a list of IP addresses. The
addresses in the list are in the same order in which
gethostbyname/getaddrinfo returned them.
This function caches its result so that, if the same host is passed
- the second time, the addresses are returned without DNS lookup. If
- you want to force lookup, call forget_host_lookup() prior to this
- function, or set opt.dns_cache to 0 to globally disable caching.
-
- If SILENT is non-zero, progress messages are not printed. */
+ the second time, the addresses are returned without DNS lookup.
+ (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to
+ globally disable caching.)
+
+ FLAGS can be a combination of:
+ LH_SILENT - don't print the "resolving ... done" messages.
+ LH_BIND - resolve addresses for use with bind, which under
+ IPv6 means to use AI_PASSIVE flag to getaddrinfo.
+ Passive lookups are not cached under IPv6.
+ LH_REFRESH - if HOST is cached, remove the entry from the cache
+ and resolve it anew. */
struct address_list *
-lookup_host (const char *host, int silent)
+lookup_host (const char *host, int flags)
{
- struct address_list *al = NULL;
+ struct address_list *al;
+ int silent = flags & LH_SILENT;
+ int use_cache;
+ int numeric_address = 0;
+ double timeout = opt.dns_timeout;
#ifndef ENABLE_IPV6
/* If we're not using getaddrinfo, first check if HOST specifies a
- numeric IPv4 address. gethostbyname is not required to accept
- dotted-decimal IPv4 addresses, and some implementations (e.g. the
- Ultrix one and possibly Winsock) indeed don't. */
+ numeric IPv4 address. Some implementations of gethostbyname
+ (e.g. the Ultrix one and possibly Winsock) don't accept
+ dotted-decimal IPv4 addresses. */
{
uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
if (addr_ipv4 != (uint32_t) -1)
return address_list_from_ipv4_addresses (vec);
}
}
+#else /* ENABLE_IPV6 */
+ /* If we're using getaddrinfo, at least check whether the address is
+ already numeric, in which case there is no need to print the
+ "Resolving..." output. (This comes at no additional cost since
+ the is_valid_ipv*_address are already required for
+ url_parse.) */
+ {
+ const char *end = host + strlen (host);
+ if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end))
+ numeric_address = 1;
+ }
#endif
- /* Try to find the host in the cache. */
+ /* Cache is normally on, but can be turned off with --no-dns-cache.
+ Don't cache passive lookups under IPv6. */
+ use_cache = opt.dns_cache;
+#ifdef ENABLE_IPV6
+ if ((flags & LH_BIND) || numeric_address)
+ use_cache = 0;
+#endif
- if (host_name_addresses_map)
+ /* Try to find the host in the cache so we don't need to talk to the
+ resolver. If LH_REFRESH is requested, remove HOST from the cache
+ instead. */
+ if (use_cache)
{
- al = hash_table_get (host_name_addresses_map, host);
- if (al)
+ if (!(flags & LH_REFRESH))
{
- DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
- ++al->refcount;
- return al;
+ al = cache_query (host);
+ if (al)
+ return al;
}
+ else
+ cache_remove (host);
}
- /* No luck with the cache; resolve the host name. */
+ /* No luck with the cache; resolve HOST. */
- if (!silent)
- logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
+ if (!silent && !numeric_address)
+ logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
#ifdef ENABLE_IPV6
{
xzero (hints);
hints.ai_socktype = SOCK_STREAM;
- hints.ai_family = AF_UNSPEC; /* #### should look at opt.ipv4_only
- and opt.ipv6_only */
- hints.ai_flags = 0;
+ if (opt.ipv4_only)
+ hints.ai_family = AF_INET;
+ else if (opt.ipv6_only)
+ hints.ai_family = AF_INET6;
+ else
+ {
+ hints.ai_family = AF_UNSPEC;
+#ifdef AI_ADDRCONFIG
+ hints.ai_flags |= AI_ADDRCONFIG;
+#else
+ /* On systems without AI_ADDRCONFIG, emulate it by manually
+ checking whether the system supports IPv6 sockets. */
+ if (!socket_has_inet6 ())
+ hints.ai_family = AF_INET;
+#endif
+ }
+ if (flags & LH_BIND)
+ hints.ai_flags |= AI_PASSIVE;
- err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
+#ifdef AI_NUMERICHOST
+ if (numeric_address)
+ {
+ /* Where available, the AI_NUMERICHOST hint can prevent costly
+ access to DNS servers. */
+ hints.ai_flags |= AI_NUMERICHOST;
+ timeout = 0; /* no timeout needed when "resolving"
+ numeric hosts -- avoid setting up
+ signal handlers and such. */
+ }
+#endif
+
+ err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout);
if (err != 0 || res == NULL)
{
if (!silent)
freeaddrinfo (res);
if (!al)
{
- logprintf (LOG_VERBOSE, _("failed: No IPv4/IPv6 addresses.\n"));
+ logprintf (LOG_VERBOSE,
+ _("failed: No IPv4/IPv6 addresses for host.\n"));
return NULL;
}
}
-#else
+#else /* not ENABLE_IPV6 */
{
- struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
+ struct hostent *hptr = gethostbyname_with_timeout (host, timeout);
if (!hptr)
{
if (!silent)
/* Do older systems have h_addr_list? */
al = address_list_from_ipv4_addresses (hptr->h_addr_list);
}
-#endif
+#endif /* not ENABLE_IPV6 */
/* Print the addresses determined by DNS lookup, but no more than
three. */
- if (!silent)
+ if (!silent && !numeric_address)
{
int i;
int printmax = al->count <= 3 ? al->count : 3;
}
/* Cache the lookup information. */
- if (opt.dns_cache)
- cache_host_lookup (host, al);
+ if (use_cache)
+ cache_store (host, al);
return al;
}
-
-/* Resolve HOST to get an address for use with bind(2). Do *not* use
- this for sockets to be used with connect(2).
-
- This is a function separate from lookup_host because the results it
- returns are different -- it uses the AI_PASSIVE flag to
- getaddrinfo. Because of this distinction, it doesn't store the
- results in the cache. It prints nothing and implements no timeouts
- because it should normally only be used with local addresses
- (typically "localhost" or numeric addresses of different local
- interfaces.)
-
- Without IPv6, this function just calls lookup_host. */
-
-struct address_list *
-lookup_host_passive (const char *host)
-{
-#ifdef ENABLE_IPV6
- struct address_list *al = NULL;
- int err;
- struct addrinfo hints, *res;
-
- xzero (hints);
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_family = AF_UNSPEC; /* #### should look at opt.ipv4_only
- and opt.ipv6_only */
- hints.ai_flags = AI_PASSIVE;
-
- err = getaddrinfo (host, NULL, &hints, &res);
- if (err != 0 || res == NULL)
- return NULL;
- al = address_list_from_addrinfo (res);
- freeaddrinfo (res);
- return al;
-#else
- return lookup_host (host, 1);
-#endif
-}
\f
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */