#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#include <assert.h>
-#include <sys/types.h>
-#ifdef WINDOWS
-# include <winsock.h>
-# define SET_H_ERRNO(err) WSASetLastError (err)
-#else
+#ifndef WINDOWS
# include <sys/socket.h>
# include <netinet/in.h>
# ifndef __BEOS__
# endif
# include <netdb.h>
# define SET_H_ERRNO(err) ((void)(h_errno = (err)))
+#else /* WINDOWS */
+# define SET_H_ERRNO(err) WSASetLastError (err)
#endif /* WINDOWS */
-#ifndef NO_ADDRESS
-#define NO_ADDRESS NO_DATA
-#endif
-
-#ifdef HAVE_SYS_UTSNAME_H
-# include <sys/utsname.h>
-#endif
#include <errno.h>
#include "wget.h"
#include "host.h"
#include "url.h"
#include "hash.h"
+#include "connect.h" /* for socket_has_inet6 */
-#ifndef errno
-extern int errno;
-#endif
-
-#ifndef h_errno
-# ifndef __CYGWIN__
-extern int h_errno;
-# endif
-#endif
-
-#ifdef ENABLE_IPV6
-int ip_default_family = AF_UNSPEC;
-#else
-int ip_default_family = AF_INET;
+#ifndef NO_ADDRESS
+# define NO_ADDRESS NO_DATA
#endif
-/* Mapping between known hosts and to lists of their addresses. */
-
-static struct hash_table *host_name_addresses_map;
-\f
-/* Lists of addresses. This should eventually be extended to handle
- IPv6. */
+/* Lists of IP addresses that result from running DNS queries. See
+ lookup_host for details. */
struct address_list {
int count; /* number of adrresses */
ip_address *addresses; /* pointer to the string of addresses */
int faulty; /* number of addresses known not to work. */
- int from_cache; /* whether this entry was pulled from
- cache or freshly looked up. */
+ int connected; /* whether we were able to connect to
+ one of the addresses in the list,
+ at least once. */
int refcount; /* reference count; when it drops to
0, the entry is freed. */
*end = al->count;
}
-/* Return whether this address list entry has been obtained from the
- cache. */
-
-int
-address_list_cached_p (const struct address_list *al)
-{
- return al->from_cache;
-}
-
/* Return a pointer to the address at position POS. */
const ip_address *
return al->addresses + pos;
}
-/* Check whether two address lists have all their IPs in common. */
+/* Return non-zero if AL contains IP, zero otherwise. */
int
-address_list_match_all (const struct address_list *al1,
- const struct address_list *al2)
+address_list_contains (const struct address_list *al, const ip_address *ip)
{
-#ifdef ENABLE_IPV6
int i;
-#endif
- if (al1 == al2)
- return 1;
- if (al1->count != al2->count)
- return 0;
-
- /* For the comparison to be complete, we'd need to sort the IP
- addresses first. But that's not necessary because this is only
- used as an optimization. */
-
-#ifndef ENABLE_IPV6
- /* In the non-IPv6 case, there is only one address type, so we can
- compare the whole array with memcmp. */
- return 0 == memcmp (al1->addresses, al2->addresses,
- al1->count * sizeof (ip_address));
-#else /* ENABLE_IPV6 */
- for (i = 0; i < al1->count; ++i)
+ switch (ip->type)
{
- const ip_address *ip1 = &al1->addresses[i];
- const ip_address *ip2 = &al2->addresses[i];
-
- if (ip1->type != ip2->type)
- return 0;
-
- switch (ip1->type)
+ case IPV4_ADDRESS:
+ for (i = 0; i < al->count; i++)
{
- case IPV4_ADDRESS:
- if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr
- != ADDRESS_IPV4_IN_ADDR (ip2).s_addr)
- return 0;
- break;
- case IPV6_ADDRESS:
+ ip_address *cur = al->addresses + i;
+ if (cur->type == IPV4_ADDRESS
+ && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
+ ==
+ ADDRESS_IPV4_IN_ADDR (ip).s_addr))
+ return 1;
+ }
+ return 0;
+#ifdef ENABLE_IPV6
+ case IPV6_ADDRESS:
+ for (i = 0; i < al->count; i++)
+ {
+ ip_address *cur = al->addresses + i;
+ if (cur->type == IPV6_ADDRESS
#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
- if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2))
- return 0;
-#endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
- if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1),
- &ADDRESS_IPV6_IN6_ADDR (ip2)))
- return 0;
- break;
- default:
- abort ();
+ && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
+#endif
+ && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
+ &ADDRESS_IPV6_IN6_ADDR (ip)))
+ return 1;
}
- }
- return 1;
+ return 0;
#endif /* ENABLE_IPV6 */
+ default:
+ abort ();
+ }
}
/* Mark the INDEXth element of AL as faulty, so that the next time
al->faulty = 0;
}
+/* Set the "connected" flag to true. This flag used by connect.c to
+ see if the host perhaps needs to be resolved again. */
+
+void
+address_list_set_connected (struct address_list *al)
+{
+ al->connected = 1;
+}
+
+/* Return the value of the "connected" flag. */
+
+int
+address_list_connected_p (const struct address_list *al)
+{
+ return al->connected;
+}
+
#ifdef ENABLE_IPV6
-/**
- * address_list_from_addrinfo
- *
- * This function transform an addrinfo links list in and address_list.
- *
- * Input:
- * addrinfo* Linked list of addrinfo
- *
- * Output:
- * address_list* New allocated address_list
- */
+
+/* Create an address_list from the addresses in the given struct
+ addrinfo. */
+
static struct address_list *
address_list_from_addrinfo (const struct addrinfo *ai)
{
return NULL;
al = xnew0 (struct address_list);
- al->addresses = xnew_array (ip_address, cnt);
- al->count = cnt;
- al->refcount = 1;
+ al->addresses = xnew_array (ip_address, cnt);
+ al->count = cnt;
+ al->refcount = 1;
ip = al->addresses;
for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
assert (ip - al->addresses == cnt);
return al;
}
-#else
+
+#define IS_IPV4(addr) (((const ip_address *) addr)->type == IPV4_ADDRESS)
+
+/* Compare two IP addresses by type, giving preference to the IPv4
+ address (sorting it first). In other words, return -1 if ADDR1 is
+ IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and
+ 0 otherwise.
+
+ This is intended to be used as the comparator arg to a qsort-like
+ sorting function, which is why it accepts generic pointers. */
+
+static int
+cmp_prefer_ipv4 (const void *addr1, const void *addr2)
+{
+ return !IS_IPV4 (addr1) - !IS_IPV4 (addr2);
+}
+
+#define IS_IPV6(addr) (((const ip_address *) addr)->type == IPV6_ADDRESS)
+
+/* Like the above, but give preference to the IPv6 address. */
+
+static int
+cmp_prefer_ipv6 (const void *addr1, const void *addr2)
+{
+ return !IS_IPV6 (addr1) - !IS_IPV6 (addr2);
+}
+
+#else /* not ENABLE_IPV6 */
+
/* Create an address_list from a NULL-terminated vector of IPv4
addresses. This kind of vector is returned by gethostbyname. */
static struct address_list *
-address_list_from_ipv4_addresses (char **h_addr_list)
+address_list_from_ipv4_addresses (char **vec)
{
int count, i;
struct address_list *al = xnew0 (struct address_list);
count = 0;
- while (h_addr_list[count])
+ while (vec[count])
++count;
assert (count > 0);
- al->addresses = xnew_array (ip_address, count);
- al->count = count;
- al->refcount = 1;
+ al->addresses = xnew_array (ip_address, count);
+ al->count = count;
+ al->refcount = 1;
for (i = 0; i < count; i++)
{
ip_address *ip = &al->addresses[i];
ip->type = IPV4_ADDRESS;
- memcpy (ADDRESS_IPV4_DATA (ip), h_addr_list[i], 4);
+ memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
}
return al;
}
-#endif
+
+#endif /* not ENABLE_IPV6 */
static void
address_list_delete (struct address_list *al)
xfree (al);
}
+/* Mark the address list as being no longer in use. This will reduce
+ its reference count which will cause the list to be freed when the
+ count reaches 0. */
+
void
address_list_release (struct address_list *al)
{
--al->refcount;
- DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
+ DEBUGP (("Releasing 0x%0*lx (new refcount %d).\n", PTR_FORMAT (al),
+ al->refcount));
if (al->refcount <= 0)
{
- DEBUGP (("Deleting unused %p.\n", al));
+ DEBUGP (("Deleting unused 0x%0*lx.\n", PTR_FORMAT (al)));
address_list_delete (al);
}
}
return ctx.hptr;
}
+/* Print error messages for host errors. */
+static char *
+host_errstr (int error)
+{
+ /* Can't use switch since some of these constants can be equal,
+ which makes the compiler complain about duplicate case
+ values. */
+ if (error == HOST_NOT_FOUND
+ || error == NO_RECOVERY
+ || error == NO_DATA
+ || error == NO_ADDRESS)
+ return _("Unknown host");
+ else if (error == TRY_AGAIN)
+ /* Message modeled after what gai_strerror returns in similar
+ circumstances. */
+ return _("Temporary failure in name resolution");
+ else
+ return _("Unknown error");
+}
+
#else /* ENABLE_IPV6 */
struct gaiwt_context {
#endif
}
abort ();
+}
+
+/* The following two functions were adapted from glibc. */
+
+static int
+is_valid_ipv4_address (const char *str, const char *end)
+{
+ int saw_digit = 0;
+ int octets = 0;
+ int val = 0;
+
+ while (str < end)
+ {
+ int ch = *str++;
+
+ if (ch >= '0' && ch <= '9')
+ {
+ val = val * 10 + (ch - '0');
+
+ if (val > 255)
+ return 0;
+ if (saw_digit == 0)
+ {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ }
+ else if (ch == '.' && saw_digit == 1)
+ {
+ if (octets == 4)
+ return 0;
+ val = 0;
+ saw_digit = 0;
+ }
+ else
+ return 0;
+ }
+ if (octets < 4)
+ return 0;
+
+ return 1;
+}
+
+int
+is_valid_ipv6_address (const char *str, const char *end)
+{
+ /* Use lower-case for these to avoid clash with system headers. */
+ enum {
+ ns_inaddrsz = 4,
+ ns_in6addrsz = 16,
+ ns_int16sz = 2
+ };
+
+ const char *curtok;
+ int tp;
+ const char *colonp;
+ int saw_xdigit;
+ unsigned int val;
+
+ tp = 0;
+ colonp = NULL;
+
+ if (str == end)
+ return 0;
+
+ /* Leading :: requires some special handling. */
+ if (*str == ':')
+ {
+ ++str;
+ if (str == end || *str != ':')
+ return 0;
+ }
+
+ curtok = str;
+ saw_xdigit = 0;
+ val = 0;
+
+ while (str < end)
+ {
+ int ch = *str++;
+
+ /* if ch is a number, add it to val. */
+ if (ISXDIGIT (ch))
+ {
+ val <<= 4;
+ val |= XDIGIT_TO_NUM (ch);
+ if (val > 0xffff)
+ return 0;
+ saw_xdigit = 1;
+ continue;
+ }
+
+ /* if ch is a colon ... */
+ if (ch == ':')
+ {
+ curtok = str;
+ if (saw_xdigit == 0)
+ {
+ if (colonp != NULL)
+ return 0;
+ colonp = str + tp;
+ continue;
+ }
+ else if (str == end)
+ return 0;
+ if (tp > ns_in6addrsz - ns_int16sz)
+ return 0;
+ tp += ns_int16sz;
+ saw_xdigit = 0;
+ val = 0;
+ continue;
+ }
+
+ /* if ch is a dot ... */
+ if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz)
+ && is_valid_ipv4_address (curtok, end) == 1)
+ {
+ tp += ns_inaddrsz;
+ saw_xdigit = 0;
+ break;
+ }
+
+ return 0;
+ }
+
+ if (saw_xdigit == 1)
+ {
+ if (tp > ns_in6addrsz - ns_int16sz)
+ return 0;
+ tp += ns_int16sz;
+ }
+
+ if (colonp != NULL)
+ {
+ if (tp == ns_in6addrsz)
+ return 0;
+ tp = ns_in6addrsz;
+ }
+
+ if (tp != ns_in6addrsz)
+ return 0;
+
+ return 1;
+}
+\f
+/* Simple host cache, used by lookup_host to speed up resolving. The
+ cache doesn't handle TTL because Wget is a fairly short-lived
+ application. Refreshing is attempted when connect fails, though --
+ see connect_to_host. */
+
+/* Mapping between known hosts and to lists of their addresses. */
+static struct hash_table *host_name_addresses_map;
+
+
+/* Return the host's resolved addresses from the cache, if
+ available. */
+
+static struct address_list *
+cache_query (const char *host)
+{
+ struct address_list *al;
+ if (!host_name_addresses_map)
+ return NULL;
+ al = hash_table_get (host_name_addresses_map, host);
+ if (al)
+ {
+ DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
+ ++al->refcount;
+ return al;
+ }
return NULL;
}
-/* Add host name HOST with the address ADDR_TEXT to the cache.
- ADDR_LIST is a NULL-terminated list of addresses, as in struct
- hostent. */
+/* Cache the DNS lookup of HOST. Subsequent invocations of
+ lookup_host will return the cached value. */
static void
-cache_host_lookup (const char *host, struct address_list *al)
+cache_store (const char *host, struct address_list *al)
{
if (!host_name_addresses_map)
host_name_addresses_map = make_nocase_string_hash_table (0);
#endif
}
-void
-forget_host_lookup (const char *host)
+/* Remove HOST from the DNS cache. Does nothing is HOST is not in
+ the cache. */
+
+static void
+cache_remove (const char *host)
{
- struct address_list *al = hash_table_get (host_name_addresses_map, host);
+ struct address_list *al;
+ if (!host_name_addresses_map)
+ return;
+ al = hash_table_get (host_name_addresses_map, host);
if (al)
{
address_list_release (al);
hash_table_remove (host_name_addresses_map, host);
}
}
+\f
+/* Look up HOST in DNS and return a list of IP addresses.
+
+ This function caches its result so that, if the same host is passed
+ the second time, the addresses are returned without DNS lookup.
+ (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to
+ globally disable caching.)
+
+ The order of the returned addresses is affected by the setting of
+ opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are
+ placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed
+ at the beginning; otherwise, the order is left intact. The
+ relative order of addresses with the same family is left
+ undisturbed in either case.
+
+ FLAGS can be a combination of:
+ LH_SILENT - don't print the "resolving ... done" messages.
+ LH_BIND - resolve addresses for use with bind, which under
+ IPv6 means to use AI_PASSIVE flag to getaddrinfo.
+ Passive lookups are not cached under IPv6.
+ LH_REFRESH - if HOST is cached, remove the entry from the cache
+ and resolve it anew. */
struct address_list *
lookup_host (const char *host, int flags)
{
- struct address_list *al = NULL;
-
-#ifdef ENABLE_IPV6
- int err, family;
- struct addrinfo hints, *res;
-
- /* Is this necessary? Should this function be changed to accept a
- FAMILY argument? */
- if (flags & LH_IPV4_ONLY)
- family = AF_INET;
- else if (flags & LH_IPV6_ONLY)
- family = AF_INET6;
- else
- family = ip_default_family;
-#endif
-
- /* First, try to check whether the address is already a numeric
- address, in which case we don't need to cache it or bother with
- setting up timeouts. Plus, if memory serves me right, Ultrix's
- gethostbyname can't handle numeric addresses (!).
-
- Where getaddrinfo is available, we do it using the AI_NUMERICHOST
- flag. Without IPv6, we use inet_addr succeeds. */
+ struct address_list *al;
+ int silent = flags & LH_SILENT;
+ int use_cache;
+ int numeric_address = 0;
+ double timeout = opt.dns_timeout;
-#ifdef ENABLE_IPV6
- xzero (hints);
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_NUMERICHOST;
- if (flags & LH_PASSIVE)
- hints.ai_flags = AI_PASSIVE;
-
- /* no need to call getaddrinfo_with_timeout here, as we're not
- * relying on the DNS, but we're only doing an address translation
- * from presentation (ASCII) to network format */
- err = getaddrinfo (host, NULL, &hints, &res);
- if (err == 0 && res != NULL)
- {
- al = address_list_from_addrinfo (res);
- freeaddrinfo (res);
- return al;
- }
-#else
+#ifndef ENABLE_IPV6
+ /* If we're not using getaddrinfo, first check if HOST specifies a
+ numeric IPv4 address. Some implementations of gethostbyname
+ (e.g. the Ultrix one and possibly Winsock) don't accept
+ dotted-decimal IPv4 addresses. */
{
uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
if (addr_ipv4 != (uint32_t) -1)
{
- /* The return value of inet_addr is in network byte order, so
- we can just copy it to IP. */
- char **vec[2];
+ /* No need to cache host->addr relation, just return the
+ address. */
+ char *vec[2];
vec[0] = (char *)&addr_ipv4;
vec[1] = NULL;
return address_list_from_ipv4_addresses (vec);
}
}
+#else /* ENABLE_IPV6 */
+ /* If we're using getaddrinfo, at least check whether the address is
+ already numeric, in which case there is no need to print the
+ "Resolving..." output. (This comes at no additional cost since
+ the is_valid_ipv*_address are already required for
+ url_parse.) */
+ {
+ const char *end = host + strlen (host);
+ if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end))
+ numeric_address = 1;
+ }
#endif
- /* Then, try to find the host in the cache. */
+ /* Cache is normally on, but can be turned off with --no-dns-cache.
+ Don't cache passive lookups under IPv6. */
+ use_cache = opt.dns_cache;
+#ifdef ENABLE_IPV6
+ if ((flags & LH_BIND) || numeric_address)
+ use_cache = 0;
+#endif
- if (host_name_addresses_map)
+ /* Try to find the host in the cache so we don't need to talk to the
+ resolver. If LH_REFRESH is requested, remove HOST from the cache
+ instead. */
+ if (use_cache)
{
- al = hash_table_get (host_name_addresses_map, host);
- if (al)
+ if (!(flags & LH_REFRESH))
{
- DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
- ++al->refcount;
- al->from_cache = 1;
- return al;
+ al = cache_query (host);
+ if (al)
+ return al;
}
+ else
+ cache_remove (host);
}
- if (!(flags & LH_SILENT))
- logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
+ /* No luck with the cache; resolve HOST. */
- /* Host name lookup goes on below. */
+ if (!silent && !numeric_address)
+ logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
#ifdef ENABLE_IPV6
{
+ int err;
+ struct addrinfo hints, *res;
+
xzero (hints);
- hints.ai_family = family;
hints.ai_socktype = SOCK_STREAM;
- if (flags & LH_PASSIVE)
- hints.ai_flags = AI_PASSIVE;
-
- err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
+ if (opt.ipv4_only)
+ hints.ai_family = AF_INET;
+ else if (opt.ipv6_only)
+ hints.ai_family = AF_INET6;
+ else
+ /* We tried using AI_ADDRCONFIG, but removed it because: it
+ misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and
+ it's unneeded since we sort the addresses anyway. */
+ hints.ai_family = AF_UNSPEC;
+
+ if (flags & LH_BIND)
+ hints.ai_flags |= AI_PASSIVE;
+
+#ifdef AI_NUMERICHOST
+ if (numeric_address)
+ {
+ /* Where available, the AI_NUMERICHOST hint can prevent costly
+ access to DNS servers. */
+ hints.ai_flags |= AI_NUMERICHOST;
+ timeout = 0; /* no timeout needed when "resolving"
+ numeric hosts -- avoid setting up
+ signal handlers and such. */
+ }
+#endif
+ err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout);
if (err != 0 || res == NULL)
{
- if (!(flags & LH_SILENT))
+ if (!silent)
logprintf (LOG_VERBOSE, _("failed: %s.\n"),
err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
- return NULL;
+ return NULL;
}
al = address_list_from_addrinfo (res);
freeaddrinfo (res);
+ if (!al)
+ {
+ logprintf (LOG_VERBOSE,
+ _("failed: No IPv4/IPv6 addresses for host.\n"));
+ return NULL;
+ }
+
+ /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per
+ --prefer-family) come first. Sorting is stable so the order of
+ the addresses with the same family is undisturbed. */
+ if (al->count > 1 && opt.prefer_family != prefer_none)
+ stable_sort (al->addresses, al->count, sizeof (ip_address),
+ opt.prefer_family == prefer_ipv4
+ ? cmp_prefer_ipv4 : cmp_prefer_ipv6);
}
-#else
+#else /* not ENABLE_IPV6 */
{
- struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
+ struct hostent *hptr = gethostbyname_with_timeout (host, timeout);
if (!hptr)
{
- if (!(flags & LH_SILENT))
+ if (!silent)
{
if (errno != ETIMEDOUT)
- logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
+ logprintf (LOG_VERBOSE, _("failed: %s.\n"),
+ host_errstr (h_errno));
else
logputs (LOG_VERBOSE, _("failed: timed out.\n"));
}
return NULL;
}
- assert (hptr->h_length == 4);
/* Do older systems have h_addr_list? */
al = address_list_from_ipv4_addresses (hptr->h_addr_list);
}
-#endif
+#endif /* not ENABLE_IPV6 */
/* Print the addresses determined by DNS lookup, but no more than
three. */
- if (!(flags & LH_SILENT))
+ if (!silent && !numeric_address)
{
int i;
int printmax = al->count <= 3 ? al->count : 3;
}
/* Cache the lookup information. */
- if (opt.dns_cache)
- cache_host_lookup (host, al);
+ if (use_cache)
+ cache_store (host, al);
return al;
}
return 0;
}
-/* Print error messages for host errors. */
-char *
-herrmsg (int error)
-{
- /* Can't use switch since some constants are equal (at least on my
- system), and the compiler signals "duplicate case value". */
- if (error == HOST_NOT_FOUND
- || error == NO_RECOVERY
- || error == NO_DATA
- || error == NO_ADDRESS
- || error == TRY_AGAIN)
- return _("Host not found");
- else
- return _("Unknown error");
-}
-
static int
host_cleanup_mapper (void *key, void *value, void *arg_ignored)
{