-/* Dealing with host names.
- Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+/* Host name resolution and matching.
+ Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ (at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
+#ifndef WINDOWS
+#include <netdb.h>
+#endif
+
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
#ifdef WINDOWS
# include <winsock.h>
+# define SET_H_ERRNO(err) WSASetLastError (err)
#else
# include <sys/socket.h>
# include <netinet/in.h>
-#ifndef __BEOS__
-# include <arpa/inet.h>
-#endif
+# ifndef __BEOS__
+# include <arpa/inet.h>
+# endif
# include <netdb.h>
+# define SET_H_ERRNO(err) ((void)(h_errno = (err)))
#endif /* WINDOWS */
#ifndef NO_ADDRESS
extern int errno;
#endif
-/* Mapping between all known hosts to their addresses (n.n.n.n). */
+#ifndef h_errno
+# ifndef __CYGWIN__
+extern int h_errno;
+# endif
+#endif
-/* #### We should map to *lists* of IP addresses. */
+#ifdef ENABLE_IPV6
+int ip_default_family = AF_UNSPEC;
+#else
+int ip_default_family = AF_INET;
+#endif
+
+/* Mapping between known hosts and to lists of their addresses. */
-struct hash_table *host_name_address_map;
+static struct hash_table *host_name_addresses_map;
+\f
+/* Lists of addresses. This should eventually be extended to handle
+ IPv6. */
-/* The following two tables are obsolete, since we no longer do host
- canonicalization. */
+struct address_list {
+ int count; /* number of adrresses */
+ ip_address *addresses; /* pointer to the string of addresses */
-/* Mapping between all known addresses (n.n.n.n) to their hosts. This
- is the inverse of host_name_address_map. These two tables share
- the strdup'ed strings. */
-struct hash_table *host_address_name_map;
+ int faulty; /* number of addresses known not to work. */
+ int from_cache; /* whether this entry was pulled from
+ cache or freshly looked up. */
-/* Mapping between auxilliary (slave) and master host names. */
-struct hash_table *host_slave_master_map;
+ int refcount; /* reference count; when it drops to
+ 0, the entry is freed. */
+};
-/* The same as gethostbyname, but supports internet addresses of the
- form `N.N.N.N'. On some systems gethostbyname() knows how to do
- this automatically. */
-struct hostent *
-ngethostbyname (const char *name)
+/* Get the bounds of the address list. */
+
+void
+address_list_get_bounds (const struct address_list *al, int *start, int *end)
{
- struct hostent *hp;
- unsigned long addr;
+ *start = al->faulty;
+ *end = al->count;
+}
- addr = (unsigned long)inet_addr (name);
- if ((int)addr != -1)
- hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
- else
- hp = gethostbyname (name);
- return hp;
+/* Return whether this address list entry has been obtained from the
+ cache. */
+
+int
+address_list_cached_p (const struct address_list *al)
+{
+ return al->from_cache;
}
-/* Add host name HOST with the address ADDR_TEXT to the cache.
- Normally this means that the (HOST, ADDR_TEXT) pair will be to
- host_name_address_map and to host_address_name_map. (It is the
- caller's responsibility to make sure that HOST is not already in
- host_name_address_map.)
+/* Return a pointer to the address at position POS. */
- If the ADDR_TEXT has already been seen and belongs to another host,
- HOST will be added to host_slave_master_map instead. */
+const ip_address *
+address_list_address_at (const struct address_list *al, int pos)
+{
+ assert (pos >= al->faulty && pos < al->count);
+ return al->addresses + pos;
+}
-static void
-add_host_to_cache (const char *host, const char *addr_text)
+/* Check whether two address lists have all their IPs in common. */
+
+int
+address_list_match_all (const struct address_list *al1,
+ const struct address_list *al2)
{
- char *canonical_name = hash_table_get (host_address_name_map, addr_text);
- if (canonical_name)
+#ifdef ENABLE_IPV6
+ int i;
+#endif
+ if (al1 == al2)
+ return 1;
+ if (al1->count != al2->count)
+ return 0;
+
+ /* For the comparison to be complete, we'd need to sort the IP
+ addresses first. But that's not necessary because this is only
+ used as an optimization. */
+
+#ifndef ENABLE_IPV6
+ /* In the non-IPv6 case, there is only one address type, so we can
+ compare the whole array with memcmp. */
+ return 0 == memcmp (al1->addresses, al2->addresses,
+ al1->count * sizeof (ip_address));
+#else /* ENABLE_IPV6 */
+ for (i = 0; i < al1->count; ++i)
{
- DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
- host, canonical_name));
- /* We've already dealt with that host under another name. */
- hash_table_put (host_slave_master_map,
- xstrdup_lower (host),
- xstrdup_lower (canonical_name));
+ const ip_address *ip1 = &al1->addresses[i];
+ const ip_address *ip2 = &al2->addresses[i];
+
+ if (ip1->type != ip2->type)
+ return 0;
+
+ switch (ip1->type)
+ {
+ case IPV4_ADDRESS:
+ if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr
+ != ADDRESS_IPV4_IN_ADDR (ip2).s_addr)
+ return 0;
+ break;
+ case IPV6_ADDRESS:
+#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
+ if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2))
+ return 0;
+#endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
+ if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1),
+ &ADDRESS_IPV6_IN6_ADDR (ip2)))
+ return 0;
+ break;
+ default:
+ abort ();
+ }
}
- else
+ return 1;
+#endif /* ENABLE_IPV6 */
+}
+
+/* Mark the INDEXth element of AL as faulty, so that the next time
+ this address list is used, the faulty element will be skipped. */
+
+void
+address_list_set_faulty (struct address_list *al, int index)
+{
+ /* We assume that the address list is traversed in order, so that a
+ "faulty" attempt is always preceded with all-faulty addresses,
+ and this is how Wget uses it. */
+ assert (index == al->faulty);
+
+ ++al->faulty;
+ if (al->faulty >= al->count)
+ /* All addresses have been proven faulty. Since there's not much
+ sense in returning the user an empty address list the next
+ time, we'll rather make them all clean, so that they can be
+ retried anew. */
+ al->faulty = 0;
+}
+
+#ifdef ENABLE_IPV6
+/**
+ * address_list_from_addrinfo
+ *
+ * This function transform an addrinfo links list in and address_list.
+ *
+ * Input:
+ * addrinfo* Linked list of addrinfo
+ *
+ * Output:
+ * address_list* New allocated address_list
+ */
+static struct address_list *
+address_list_from_addrinfo (const struct addrinfo *ai)
+{
+ struct address_list *al;
+ const struct addrinfo *ptr;
+ int cnt;
+ ip_address *ip;
+
+ cnt = 0;
+ for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
+ if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
+ ++cnt;
+ if (cnt == 0)
+ return NULL;
+
+ al = xnew0 (struct address_list);
+ al->addresses = xnew_array (ip_address, cnt);
+ al->count = cnt;
+ al->refcount = 1;
+
+ ip = al->addresses;
+ for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
+ if (ptr->ai_family == AF_INET6)
+ {
+ const struct sockaddr_in6 *sin6 =
+ (const struct sockaddr_in6 *)ptr->ai_addr;
+ ip->type = IPV6_ADDRESS;
+ ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
+#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
+ ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
+#endif
+ ++ip;
+ }
+ else if (ptr->ai_family == AF_INET)
+ {
+ const struct sockaddr_in *sin =
+ (const struct sockaddr_in *)ptr->ai_addr;
+ ip->type = IPV4_ADDRESS;
+ ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
+ ++ip;
+ }
+ assert (ip - al->addresses == cnt);
+ return al;
+}
+#else
+/* Create an address_list from a NULL-terminated vector of IPv4
+ addresses. This kind of vector is returned by gethostbyname. */
+
+static struct address_list *
+address_list_from_ipv4_addresses (char **h_addr_list)
+{
+ int count, i;
+ struct address_list *al = xnew0 (struct address_list);
+
+ count = 0;
+ while (h_addr_list[count])
+ ++count;
+ assert (count > 0);
+
+ al->addresses = xnew_array (ip_address, count);
+ al->count = count;
+ al->refcount = 1;
+
+ for (i = 0; i < count; i++)
{
- /* This is really the first time we're dealing with that host. */
- char *h_copy = xstrdup_lower (host);
- char *a_copy = xstrdup (addr_text);
- DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
- hash_table_put (host_name_address_map, h_copy, a_copy);
- hash_table_put (host_address_name_map, a_copy, h_copy);
+ ip_address *ip = &al->addresses[i];
+ ip->type = IPV4_ADDRESS;
+ memcpy (ADDRESS_IPV4_DATA (ip), h_addr_list[i], 4);
}
+
+ return al;
}
+#endif
-/* Store the address of HOSTNAME, internet-style (four octets in
- network order), to WHERE. First try to get the address from the
- cache; if it is not available, call the DNS functions and update
- the cache.
+static void
+address_list_delete (struct address_list *al)
+{
+ xfree (al->addresses);
+ xfree (al);
+}
- Return 1 on successful finding of the hostname, 0 otherwise. */
-int
-store_hostaddress (unsigned char *where, const char *hostname)
+void
+address_list_release (struct address_list *al)
{
- unsigned long addr;
- char *addr_text;
- char *canonical_name;
+ --al->refcount;
+ DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
+ if (al->refcount <= 0)
+ {
+ DEBUGP (("Deleting unused %p.\n", al));
+ address_list_delete (al);
+ }
+}
+\f
+/* Versions of gethostbyname and getaddrinfo that support timeout. */
+
+#ifndef ENABLE_IPV6
+
+struct ghbnwt_context {
+ const char *host_name;
struct hostent *hptr;
- struct in_addr in;
- char *inet_s;
-
- /* If the address is of the form d.d.d.d, there will be no trouble
- with it. */
- addr = (unsigned long)inet_addr (hostname);
- /* If we have the numeric address, just store it. */
- if ((int)addr != -1)
+};
+
+static void
+gethostbyname_with_timeout_callback (void *arg)
+{
+ struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
+ ctx->hptr = gethostbyname (ctx->host_name);
+}
+
+/* Just like gethostbyname, except it times out after TIMEOUT seconds.
+ In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
+ The function makes sure that when NULL is returned for reasons
+ other than timeout, errno is reset. */
+
+static struct hostent *
+gethostbyname_with_timeout (const char *host_name, double timeout)
+{
+ struct ghbnwt_context ctx;
+ ctx.host_name = host_name;
+ if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
{
- /* ADDR is defined to be in network byte order, meaning the code
- works on little and big endian 32-bit architectures without
- change. On big endian 64-bit architectures we need to be
- careful to copy the correct four bytes. */
- int offset;
- have_addr:
-#ifdef WORDS_BIGENDIAN
- offset = sizeof (unsigned long) - 4;
-#else
- offset = 0;
-#endif
- memcpy (where, (char *)&addr + offset, 4);
- return 1;
+ SET_H_ERRNO (HOST_NOT_FOUND);
+ errno = ETIMEDOUT;
+ return NULL;
}
+ if (!ctx.hptr)
+ errno = 0;
+ return ctx.hptr;
+}
+
+#else /* ENABLE_IPV6 */
+
+struct gaiwt_context {
+ const char *node;
+ const char *service;
+ const struct addrinfo *hints;
+ struct addrinfo **res;
+ int exit_code;
+};
+
+static void
+getaddrinfo_with_timeout_callback (void *arg)
+{
+ struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
+ ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
+}
- /* By now we know that the address is not of the form d.d.d.d. Try
- to find it in our cache of host addresses. */
- addr_text = hash_table_get (host_name_address_map, hostname);
- if (addr_text)
+/* Just like getaddrinfo, except it times out after TIMEOUT seconds.
+ In case of timeout, the EAI_SYSTEM error code is returned and errno
+ is set to ETIMEDOUT. */
+
+static int
+getaddrinfo_with_timeout (const char *node, const char *service,
+ const struct addrinfo *hints, struct addrinfo **res,
+ double timeout)
+{
+ struct gaiwt_context ctx;
+ ctx.node = node;
+ ctx.service = service;
+ ctx.hints = hints;
+ ctx.res = res;
+
+ if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
{
- DEBUGP (("Found %s in host_name_address_map: %s\n",
- hostname, addr_text));
- addr = (unsigned long)inet_addr (addr_text);
- goto have_addr;
+ errno = ETIMEDOUT;
+ return EAI_SYSTEM;
}
+ return ctx.exit_code;
+}
- /* Maybe this host is known to us under another name. If so, we'll
- find it in host_slave_master_map, and use the master name to find
- its address in host_name_address_map. */
- canonical_name = hash_table_get (host_slave_master_map, hostname);
- if (canonical_name)
+#endif /* ENABLE_IPV6 */
+\f
+/* Pretty-print ADDR. When compiled without IPv6, this is the same as
+ inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
+ address. */
+
+const char *
+pretty_print_address (const ip_address *addr)
+{
+ switch (addr->type)
{
- addr_text = hash_table_get (host_name_address_map, canonical_name);
- assert (addr_text != NULL);
- DEBUGP (("Found %s as slave of %s -> %s\n",
- hostname, canonical_name, addr_text));
- addr = (unsigned long)inet_addr (addr_text);
- goto have_addr;
+ case IPV4_ADDRESS:
+ return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
+#ifdef ENABLE_IPV6
+ case IPV6_ADDRESS:
+ {
+ static char buf[128];
+ inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
+#if 0
+#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
+ {
+ /* append "%SCOPE_ID" for all ?non-global? addresses */
+ char *p = buf + strlen (buf);
+ *p++ = '%';
+ number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
+ }
+#endif
+#endif
+ buf[sizeof (buf) - 1] = '\0';
+ return buf;
+ }
+#endif
}
+ abort ();
+ return NULL;
+}
- /* Since all else has failed, let's try gethostbyname(). Note that
- we use gethostbyname() rather than ngethostbyname(), because we
- already know that the address is not numerical. */
- hptr = gethostbyname (hostname);
- if (!hptr)
- return 0;
- /* Copy the address of the host to socket description. */
- memcpy (where, hptr->h_addr_list[0], hptr->h_length);
- assert (hptr->h_length == 4);
-
- /* Now that we've gone through the truoble of calling
- gethostbyname(), we can store this valuable information to the
- cache. First, we have to look for it by address to know if it's
- already in the cache by another name. */
- /* Originally, we copied to in.s_addr, but it appears to be missing
- on some systems. */
- memcpy (&in, *hptr->h_addr_list, sizeof (in));
- inet_s = inet_ntoa (in);
- add_host_to_cache (hostname, inet_s);
- return 1;
+/* Add host name HOST with the address ADDR_TEXT to the cache.
+ ADDR_LIST is a NULL-terminated list of addresses, as in struct
+ hostent. */
+
+static void
+cache_host_lookup (const char *host, struct address_list *al)
+{
+ if (!host_name_addresses_map)
+ host_name_addresses_map = make_nocase_string_hash_table (0);
+
+ ++al->refcount;
+ hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
+
+#ifdef ENABLE_DEBUG
+ if (opt.debug)
+ {
+ int i;
+ debug_logprintf ("Caching %s =>", host);
+ for (i = 0; i < al->count; i++)
+ debug_logprintf (" %s", pretty_print_address (al->addresses + i));
+ debug_logprintf ("\n");
+ }
+#endif
}
+void
+forget_host_lookup (const char *host)
+{
+ struct address_list *al = hash_table_get (host_name_addresses_map, host);
+ if (al)
+ {
+ address_list_release (al);
+ hash_table_remove (host_name_addresses_map, host);
+ }
+}
+
+struct address_list *
+lookup_host (const char *host, int flags)
+{
+ struct address_list *al = NULL;
+
+#ifdef ENABLE_IPV6
+ int err, family;
+ struct addrinfo hints, *res;
+
+ /* Is this necessary? Should this function be changed to accept a
+ FAMILY argument? */
+ if (flags & LH_IPV4_ONLY)
+ family = AF_INET;
+ else if (flags & LH_IPV6_ONLY)
+ family = AF_INET6;
+ else
+ family = ip_default_family;
+#endif
+
+ /* First, try to check whether the address is already a numeric
+ address, in which case we don't need to cache it or bother with
+ setting up timeouts. Plus, if memory serves me right, Ultrix's
+ gethostbyname can't handle numeric addresses (!).
+
+ Where getaddrinfo is available, we do it using the AI_NUMERICHOST
+ flag. Without IPv6, we use inet_addr succeeds. */
+
+#ifdef ENABLE_IPV6
+ xzero (hints);
+ hints.ai_family = family;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_NUMERICHOST;
+ if (flags & LH_PASSIVE)
+ hints.ai_flags = AI_PASSIVE;
+
+ /* no need to call getaddrinfo_with_timeout here, as we're not
+ * relying on the DNS, but we're only doing an address translation
+ * from presentation (ASCII) to network format */
+ err = getaddrinfo (host, NULL, &hints, &res);
+ if (err == 0 && res != NULL)
+ {
+ al = address_list_from_addrinfo (res);
+ freeaddrinfo (res);
+ return al;
+ }
+#else
+ {
+ uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
+ if (addr_ipv4 != (uint32_t) -1)
+ {
+ /* The return value of inet_addr is in network byte order, so
+ we can just copy it to IP. */
+ char **vec[2];
+ vec[0] = (char *)&addr_ipv4;
+ vec[1] = NULL;
+ return address_list_from_ipv4_addresses (vec);
+ }
+ }
+#endif
+
+ /* Then, try to find the host in the cache. */
+
+ if (host_name_addresses_map)
+ {
+ al = hash_table_get (host_name_addresses_map, host);
+ if (al)
+ {
+ DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
+ ++al->refcount;
+ al->from_cache = 1;
+ return al;
+ }
+ }
+
+ if (!(flags & LH_SILENT))
+ logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
+
+ /* Host name lookup goes on below. */
+
+#ifdef ENABLE_IPV6
+ {
+ xzero (hints);
+ hints.ai_family = family;
+ hints.ai_socktype = SOCK_STREAM;
+ if (flags & LH_PASSIVE)
+ hints.ai_flags = AI_PASSIVE;
+
+ err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
+
+ if (err != 0 || res == NULL)
+ {
+ if (!(flags & LH_SILENT))
+ logprintf (LOG_VERBOSE, _("failed: %s.\n"),
+ err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
+ return NULL;
+ }
+ al = address_list_from_addrinfo (res);
+ freeaddrinfo (res);
+ }
+#else
+ {
+ struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
+ if (!hptr)
+ {
+ if (!(flags & LH_SILENT))
+ {
+ if (errno != ETIMEDOUT)
+ logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
+ else
+ logputs (LOG_VERBOSE, _("failed: timed out.\n"));
+ }
+ return NULL;
+ }
+ assert (hptr->h_length == 4);
+ /* Do older systems have h_addr_list? */
+ al = address_list_from_ipv4_addresses (hptr->h_addr_list);
+ }
+#endif
+
+ /* Print the addresses determined by DNS lookup, but no more than
+ three. */
+ if (!(flags & LH_SILENT))
+ {
+ int i;
+ int printmax = al->count <= 3 ? al->count : 3;
+ for (i = 0; i < printmax; i++)
+ {
+ logprintf (LOG_VERBOSE, "%s",
+ pretty_print_address (al->addresses + i));
+ if (i < printmax - 1)
+ logputs (LOG_VERBOSE, ", ");
+ }
+ if (printmax != al->count)
+ logputs (LOG_VERBOSE, ", ...");
+ logputs (LOG_VERBOSE, "\n");
+ }
+
+ /* Cache the lookup information. */
+ if (opt.dns_cache)
+ cache_host_lookup (host, al);
+
+ return al;
+}
+\f
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
return _("Unknown error");
}
-void
-host_cleanup (void)
+static int
+host_cleanup_mapper (void *key, void *value, void *arg_ignored)
{
- /* host_name_address_map and host_address_name_map share the
- strings. Because of that, calling free_keys_and_values once
- suffices for both. */
- free_keys_and_values (host_name_address_map);
- hash_table_destroy (host_name_address_map);
- hash_table_destroy (host_address_name_map);
- free_keys_and_values (host_slave_master_map);
- hash_table_destroy (host_slave_master_map);
+ struct address_list *al;
+
+ xfree (key); /* host */
+
+ al = (struct address_list *)value;
+ assert (al->refcount == 1);
+ address_list_delete (al);
+
+ return 0;
}
void
-host_init (void)
+host_cleanup (void)
{
- host_name_address_map = make_string_hash_table (0);
- host_address_name_map = make_string_hash_table (0);
- host_slave_master_map = make_string_hash_table (0);
+ if (host_name_addresses_map)
+ {
+ hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
+ hash_table_destroy (host_name_addresses_map);
+ host_name_addresses_map = NULL;
+ }
}