-/* Dealing with host names.
- Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+/* Host name resolution and matching.
+ Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+along with Wget; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
+#ifndef WINDOWS
+#include <netdb.h>
+#endif
+
#include <stdio.h>
#include <stdlib.h>
-#include <ctype.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
#ifdef WINDOWS
# include <winsock.h>
+# define SET_H_ERRNO(err) WSASetLastError (err)
#else
# include <sys/socket.h>
# include <netinet/in.h>
-# include <arpa/inet.h>
+# ifndef __BEOS__
+# include <arpa/inet.h>
+# endif
# include <netdb.h>
+# define SET_H_ERRNO(err) ((void)(h_errno = (err)))
#endif /* WINDOWS */
-#ifdef HAVE_SYS_UTSNAME_H
-# include <sys/utsname.h>
+#ifndef NO_ADDRESS
+# define NO_ADDRESS NO_DATA
#endif
+
#include <errno.h>
#include "wget.h"
#include "utils.h"
#include "host.h"
#include "url.h"
+#include "hash.h"
+#include "connect.h" /* for socket_has_inet6 */
#ifndef errno
extern int errno;
#endif
-/* Host list entry */
-struct host
-{
- /* Host's symbolical name, as encountered at the time of first
- inclusion, e.g. "fly.cc.fer.hr". */
- char *hostname;
- /* Host's "real" name, i.e. its IP address, written out in ASCII
- form of N.N.N.N, e.g. "161.53.70.130". */
- char *realname;
- /* More than one HOSTNAME can correspond to the same REALNAME. For
- our purposes, the canonical name of the host is its HOSTNAME when
- it was first encountered. This entry is said to have QUALITY. */
- int quality;
- /* Next entry in the list. */
- struct host *next;
-};
+#ifndef h_errno
+# ifndef __CYGWIN__
+extern int h_errno;
+# endif
+#endif
-static struct host *hlist;
+/* Lists of IP addresses that result from running DNS queries. See
+ lookup_host for details. */
-static struct host *add_hlist PARAMS ((struct host *, const char *,
- const char *, int));
+struct address_list {
+ int count; /* number of adrresses */
+ ip_address *addresses; /* pointer to the string of addresses */
-/* The same as gethostbyname, but supports internet addresses of the
- form `N.N.N.N'. */
-struct hostent *
-ngethostbyname (const char *name)
-{
- struct hostent *hp;
- unsigned long addr;
+ int faulty; /* number of addresses known not to work. */
+ int connected; /* whether we were able to connect to
+ one of the addresses in the list,
+ at least once. */
- addr = (unsigned long)inet_addr (name);
- if ((int)addr != -1)
- hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
- else
- hp = gethostbyname (name);
- return hp;
+ int refcount; /* reference count; when it drops to
+ 0, the entry is freed. */
+};
+
+/* Get the bounds of the address list. */
+
+void
+address_list_get_bounds (const struct address_list *al, int *start, int *end)
+{
+ *start = al->faulty;
+ *end = al->count;
}
-/* Search for HOST in the linked list L, by hostname. Return the
- entry, if found, or NULL. The search is case-insensitive. */
-static struct host *
-search_host (struct host *l, const char *host)
+/* Return a pointer to the address at position POS. */
+
+const ip_address *
+address_list_address_at (const struct address_list *al, int pos)
{
- for (; l; l = l->next)
- if (strcasecmp (l->hostname, host) == 0)
- return l;
- return NULL;
+ assert (pos >= al->faulty && pos < al->count);
+ return al->addresses + pos;
}
-/* Like search_host, but searches by address. */
-static struct host *
-search_address (struct host *l, const char *address)
+/* Return non-zero if AL contains IP, zero otherwise. */
+
+int
+address_list_contains (const struct address_list *al, const ip_address *ip)
{
- for (; l; l = l->next)
+ int i;
+ switch (ip->type)
{
- int cmp = strcmp (l->realname, address);
- if (cmp == 0)
- return l;
- else if (cmp > 0)
- return NULL;
+ case IPV4_ADDRESS:
+ for (i = 0; i < al->count; i++)
+ {
+ ip_address *cur = al->addresses + i;
+ if (cur->type == IPV4_ADDRESS
+ && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
+ ==
+ ADDRESS_IPV4_IN_ADDR (ip).s_addr))
+ return 1;
+ }
+ return 0;
+#ifdef ENABLE_IPV6
+ case IPV6_ADDRESS:
+ for (i = 0; i < al->count; i++)
+ {
+ ip_address *cur = al->addresses + i;
+ if (cur->type == IPV6_ADDRESS
+#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
+ && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
+#endif
+ && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
+ &ADDRESS_IPV6_IN6_ADDR (ip)))
+ return 1;
+ }
+ return 0;
+#endif /* ENABLE_IPV6 */
+ default:
+ abort ();
+ return 0;
}
- return NULL;
}
-/* Store the address of HOSTNAME, internet-style, to WHERE. First
- check for it in the host list, and (if not found), use
- ngethostbyname to get it.
+/* Mark the INDEXth element of AL as faulty, so that the next time
+ this address list is used, the faulty element will be skipped. */
+
+void
+address_list_set_faulty (struct address_list *al, int index)
+{
+ /* We assume that the address list is traversed in order, so that a
+ "faulty" attempt is always preceded with all-faulty addresses,
+ and this is how Wget uses it. */
+ assert (index == al->faulty);
+
+ ++al->faulty;
+ if (al->faulty >= al->count)
+ /* All addresses have been proven faulty. Since there's not much
+ sense in returning the user an empty address list the next
+ time, we'll rather make them all clean, so that they can be
+ retried anew. */
+ al->faulty = 0;
+}
+
+/* Set the "connected" flag to true. This flag used by connect.c to
+ see if the host perhaps needs to be resolved again. */
+
+void
+address_list_set_connected (struct address_list *al)
+{
+ al->connected = 1;
+}
+
+/* Return the value of the "connected" flag. */
- Return 1 on successful finding of the hostname, 0 otherwise. */
int
-store_hostaddress (unsigned char *where, const char *hostname)
+address_list_connected_p (const struct address_list *al)
{
- struct host *t;
- unsigned long addr;
- struct hostent *hptr;
- struct in_addr in;
- char *inet_s;
+ return al->connected;
+}
+
+#ifdef ENABLE_IPV6
+
+/* Create an address_list from the addresses in the given struct
+ addrinfo. */
+
+static struct address_list *
+address_list_from_addrinfo (const struct addrinfo *ai)
+{
+ struct address_list *al;
+ const struct addrinfo *ptr;
+ int cnt;
+ ip_address *ip;
+
+ cnt = 0;
+ for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
+ if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
+ ++cnt;
+ if (cnt == 0)
+ return NULL;
+
+ al = xnew0 (struct address_list);
+ al->addresses = xnew_array (ip_address, cnt);
+ al->count = cnt;
+ al->refcount = 1;
+
+ ip = al->addresses;
+ for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
+ if (ptr->ai_family == AF_INET6)
+ {
+ const struct sockaddr_in6 *sin6 =
+ (const struct sockaddr_in6 *)ptr->ai_addr;
+ ip->type = IPV6_ADDRESS;
+ ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
+#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
+ ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
+#endif
+ ++ip;
+ }
+ else if (ptr->ai_family == AF_INET)
+ {
+ const struct sockaddr_in *sin =
+ (const struct sockaddr_in *)ptr->ai_addr;
+ ip->type = IPV4_ADDRESS;
+ ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
+ ++ip;
+ }
+ assert (ip - al->addresses == cnt);
+ return al;
+}
+
+#else /* not ENABLE_IPV6 */
+
+/* Create an address_list from a NULL-terminated vector of IPv4
+ addresses. This kind of vector is returned by gethostbyname. */
+
+static struct address_list *
+address_list_from_ipv4_addresses (char **vec)
+{
+ int count, i;
+ struct address_list *al = xnew0 (struct address_list);
+
+ count = 0;
+ while (vec[count])
+ ++count;
+ assert (count > 0);
+
+ al->addresses = xnew_array (ip_address, count);
+ al->count = count;
+ al->refcount = 1;
- /* If the address is of the form d.d.d.d, there will be no trouble
- with it. */
- addr = (unsigned long)inet_addr (hostname);
- if ((int)addr == -1)
+ for (i = 0; i < count; i++)
{
- /* If it is not of that form, try to find it in the cache. */
- t = search_host (hlist, hostname);
- if (t)
- addr = (unsigned long)inet_addr (t->realname);
+ ip_address *ip = &al->addresses[i];
+ ip->type = IPV4_ADDRESS;
+ memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
}
- /* If we have the numeric address, just store it. */
- if ((int)addr != -1)
+
+ return al;
+}
+
+#endif /* not ENABLE_IPV6 */
+
+static void
+address_list_delete (struct address_list *al)
+{
+ xfree (al->addresses);
+ xfree (al);
+}
+
+/* Mark the address list as being no longer in use. This will reduce
+ its reference count which will cause the list to be freed when the
+ count reaches 0. */
+
+void
+address_list_release (struct address_list *al)
+{
+ --al->refcount;
+ DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
+ if (al->refcount <= 0)
{
- /* This works on both little and big endian architecture, as
- inet_addr returns the address in the proper order. It
- appears to work on 64-bit machines too. */
- memcpy (where, &addr, 4);
- return 1;
+ DEBUGP (("Deleting unused %p.\n", al));
+ address_list_delete (al);
}
- /* Since all else has failed, let's try gethostbyname(). Note that
- we use gethostbyname() rather than ngethostbyname(), because we
- *know* the address is not numerical. */
- hptr = gethostbyname (hostname);
- if (!hptr)
- return 0;
- /* Copy the address of the host to socket description. */
- memcpy (where, hptr->h_addr_list[0], hptr->h_length);
- /* Now that we're here, we could as well cache the hostname for
- future use, as in realhost(). First, we have to look for it by
- address to know if it's already in the cache by another name. */
-
- /* Originally, we copied to in.s_addr, but it appears to be missing
- on some systems. */
- memcpy (&in, *hptr->h_addr_list, sizeof (in));
- STRDUP_ALLOCA (inet_s, inet_ntoa (in));
- t = search_address (hlist, inet_s);
- if (t) /* Found in the list, as realname. */
+}
+\f
+/* Versions of gethostbyname and getaddrinfo that support timeout. */
+
+#ifndef ENABLE_IPV6
+
+struct ghbnwt_context {
+ const char *host_name;
+ struct hostent *hptr;
+};
+
+static void
+gethostbyname_with_timeout_callback (void *arg)
+{
+ struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
+ ctx->hptr = gethostbyname (ctx->host_name);
+}
+
+/* Just like gethostbyname, except it times out after TIMEOUT seconds.
+ In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
+ The function makes sure that when NULL is returned for reasons
+ other than timeout, errno is reset. */
+
+static struct hostent *
+gethostbyname_with_timeout (const char *host_name, double timeout)
+{
+ struct ghbnwt_context ctx;
+ ctx.host_name = host_name;
+ if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
{
- /* Set the default, 0 quality. */
- hlist = add_hlist (hlist, hostname, inet_s, 0);
- return 1;
+ SET_H_ERRNO (HOST_NOT_FOUND);
+ errno = ETIMEDOUT;
+ return NULL;
}
- /* Since this is really the first time this host is encountered,
- set quality to 1. */
- hlist = add_hlist (hlist, hostname, inet_s, 1);
- return 1;
+ if (!ctx.hptr)
+ errno = 0;
+ return ctx.hptr;
+}
+
+/* Print error messages for host errors. */
+static char *
+host_errstr (int error)
+{
+ /* Can't use switch since some of these constants can be equal,
+ which makes the compiler complain about duplicate case
+ values. */
+ if (error == HOST_NOT_FOUND
+ || error == NO_RECOVERY
+ || error == NO_DATA
+ || error == NO_ADDRESS)
+ return _("Unknown host");
+ else if (error == TRY_AGAIN)
+ /* Message modeled after what gai_strerror returns in similar
+ circumstances. */
+ return _("Temporary failure in name resolution");
+ else
+ return _("Unknown error");
+}
+
+#else /* ENABLE_IPV6 */
+
+struct gaiwt_context {
+ const char *node;
+ const char *service;
+ const struct addrinfo *hints;
+ struct addrinfo **res;
+ int exit_code;
+};
+
+static void
+getaddrinfo_with_timeout_callback (void *arg)
+{
+ struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
+ ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
}
-/* Add a host to the host list. The list is sorted by addresses. For
- equal addresses, the entries with quality should bubble towards the
- beginning of the list. */
-static struct host *
-add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
+/* Just like getaddrinfo, except it times out after TIMEOUT seconds.
+ In case of timeout, the EAI_SYSTEM error code is returned and errno
+ is set to ETIMEDOUT. */
+
+static int
+getaddrinfo_with_timeout (const char *node, const char *service,
+ const struct addrinfo *hints, struct addrinfo **res,
+ double timeout)
{
- struct host *t, *old, *beg;
+ struct gaiwt_context ctx;
+ ctx.node = node;
+ ctx.service = service;
+ ctx.hints = hints;
+ ctx.res = res;
- /* The entry goes to the beginning of the list if the list is empty
- or the order requires it. */
- if (!l || (strcmp (nreal, l->realname) < 0))
+ if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
{
- t = (struct host *)xmalloc (sizeof (struct host));
- t->hostname = xstrdup (nhost);
- t->realname = xstrdup (nreal);
- t->quality = quality;
- t->next = l;
- return t;
+ errno = ETIMEDOUT;
+ return EAI_SYSTEM;
}
+ return ctx.exit_code;
+}
+
+#endif /* ENABLE_IPV6 */
+\f
+/* Pretty-print ADDR. When compiled without IPv6, this is the same as
+ inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
+ address. */
- beg = l;
- /* Second two one-before-the-last element. */
- while (l->next)
+const char *
+pretty_print_address (const ip_address *addr)
+{
+ switch (addr->type)
{
- int cmp;
- old = l;
- l = l->next;
- cmp = strcmp (nreal, l->realname);
- if (cmp >= 0)
- continue;
- /* If the next list element is greater than s, put s between the
- current and the next list element. */
- t = (struct host *)xmalloc (sizeof (struct host));
- old->next = t;
- t->next = l;
- t->hostname = xstrdup (nhost);
- t->realname = xstrdup (nreal);
- t->quality = quality;
- return beg;
+ case IPV4_ADDRESS:
+ return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
+#ifdef ENABLE_IPV6
+ case IPV6_ADDRESS:
+ {
+ static char buf[128];
+ inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
+#if 0
+#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
+ {
+ /* append "%SCOPE_ID" for all ?non-global? addresses */
+ char *p = buf + strlen (buf);
+ *p++ = '%';
+ number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
+ }
+#endif
+#endif
+ buf[sizeof (buf) - 1] = '\0';
+ return buf;
+ }
+#endif
}
- t = (struct host *)xmalloc (sizeof (struct host));
- t->hostname = xstrdup (nhost);
- t->realname = xstrdup (nreal);
- t->quality = quality;
- /* Insert the new element after the last element. */
- l->next = t;
- t->next = NULL;
- return beg;
+ abort ();
+ return NULL;
}
-/* Determine the "real" name of HOST, as perceived by Wget. If HOST
- is referenced by more than one name, "real" name is considered to
- be the first one encountered in the past.
+/* The following two functions were adapted from glibc. */
- If the host cannot be found in the list of already dealt-with
- hosts, try with its INET address. If this fails too, add it to the
- list. The routine does not call gethostbyname twice for the same
- host if it can possibly avoid it. */
-char *
-realhost (const char *host)
+static int
+is_valid_ipv4_address (const char *str, const char *end)
{
- struct host *l;
- struct in_addr in;
- struct hostent *hptr;
- char *inet_s;
+ int saw_digit = 0;
+ int octets = 0;
+ int val = 0;
+
+ while (str < end)
+ {
+ int ch = *str++;
+
+ if (ch >= '0' && ch <= '9')
+ {
+ val = val * 10 + (ch - '0');
+
+ if (val > 255)
+ return 0;
+ if (saw_digit == 0)
+ {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ }
+ else if (ch == '.' && saw_digit == 1)
+ {
+ if (octets == 4)
+ return 0;
+ val = 0;
+ saw_digit = 0;
+ }
+ else
+ return 0;
+ }
+ if (octets < 4)
+ return 0;
+
+ return 1;
+}
- DEBUGP (("Checking for %s.\n", host));
- /* Look for the host, looking by the host name. */
- l = search_host (hlist, host);
- if (l && l->quality) /* Found it with quality */
+int
+is_valid_ipv6_address (const char *str, const char *end)
+{
+ enum {
+ NS_INADDRSZ = 4,
+ NS_IN6ADDRSZ = 16,
+ NS_INT16SZ = 2
+ };
+
+ const char *curtok;
+ int tp;
+ const char *colonp;
+ int saw_xdigit;
+ unsigned int val;
+
+ tp = 0;
+ colonp = NULL;
+
+ if (str == end)
+ return 0;
+
+ /* Leading :: requires some special handling. */
+ if (*str == ':')
{
- DEBUGP (("%s was already used, by that name.\n", host));
- /* Here we return l->hostname, not host, because of the possible
- case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
- the same, but we want the one that was first. */
- return xstrdup (l->hostname);
+ ++str;
+ if (str == end || *str != ':')
+ return 0;
}
- else if (!l) /* Not found, with or without quality */
+
+ curtok = str;
+ saw_xdigit = 0;
+ val = 0;
+
+ while (str < end)
{
- /* The fact that gethostbyname will get called makes it
- necessary to store it to the list, to ensure that
- gethostbyname will not be called twice for the same string.
- However, the quality argument must be set appropriately.
-
- Note that add_hlist must be called *after* the realname
- search, or the quality would be always set to 0 */
- DEBUGP (("This is the first time I hear about host %s by that name.\n",
- host));
- hptr = ngethostbyname (host);
- if (!hptr)
- return xstrdup (host);
- /* Originally, we copied to in.s_addr, but it appears to be
- missing on some systems. */
- memcpy (&in, *hptr->h_addr_list, sizeof (in));
- STRDUP_ALLOCA (inet_s, inet_ntoa (in));
+ int ch = *str++;
+
+ /* if ch is a number, add it to val. */
+ if (ISXDIGIT (ch))
+ {
+ val <<= 4;
+ val |= XDIGIT_TO_NUM (ch);
+ if (val > 0xffff)
+ return 0;
+ saw_xdigit = 1;
+ continue;
+ }
+
+ /* if ch is a colon ... */
+ if (ch == ':')
+ {
+ curtok = str;
+ if (saw_xdigit == 0)
+ {
+ if (colonp != NULL)
+ return 0;
+ colonp = str + tp;
+ continue;
+ }
+ else if (str == end)
+ return 0;
+ if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+ return 0;
+ tp += NS_INT16SZ;
+ saw_xdigit = 0;
+ val = 0;
+ continue;
+ }
+
+ /* if ch is a dot ... */
+ if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ)
+ && is_valid_ipv4_address (curtok, end) == 1)
+ {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ break;
+ }
+
+ return 0;
}
- else /* Found, without quality */
+
+ if (saw_xdigit == 1)
{
- /* This case happens when host is on the list,
- but not as first entry (the one with quality).
- Then we just get its INET address and pick
- up the first entry with quality. */
- DEBUGP (("We've dealt with host %s, but under the name %s.\n",
- host, l->realname));
- STRDUP_ALLOCA (inet_s, l->realname);
+ if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+ return 0;
+ tp += NS_INT16SZ;
}
- /* Now we certainly have the INET address. The following loop is
- guaranteed to pick either an entry with quality (because it is
- the first one), or none at all. */
- l = search_address (hlist, inet_s);
- if (l) /* Found in the list, as realname. */
+ if (colonp != NULL)
{
- /* Set the default, 0 quality. */
- hlist = add_hlist (hlist, host, inet_s, 0);
- return xstrdup (l->hostname);
+ if (tp == NS_IN6ADDRSZ)
+ return 0;
+ tp = NS_IN6ADDRSZ;
}
- /* Since this is really the first time this host is encountered,
- set quality to 1. */
- hlist = add_hlist (hlist, host, inet_s, 1);
- return xstrdup (host);
+
+ if (tp != NS_IN6ADDRSZ)
+ return 0;
+
+ return 1;
}
+\f
+/* Simple host cache, used by lookup_host to speed up resolving. The
+ cache doesn't handle TTL because Wget is a fairly short-lived
+ application. Refreshing is attempted when connect fails, though --
+ see connect_to_host. */
-/* Compare two hostnames (out of URL-s if the arguments are URL-s),
- taking care of aliases. It uses realhost() to determine a unique
- hostname for each of two hosts. If simple_check is non-zero, only
- strcmp() is used for comparison. */
-int
-same_host (const char *u1, const char *u2)
+/* Mapping between known hosts and to lists of their addresses. */
+static struct hash_table *host_name_addresses_map;
+
+
+/* Return the host's resolved addresses from the cache, if
+ available. */
+
+static struct address_list *
+cache_query (const char *host)
{
- const char *s;
- char *p1, *p2;
- char *real1, *real2;
-
- /* Skip protocol, if present. */
- u1 += skip_url (u1);
- u2 += skip_url (u2);
- u1 += skip_proto (u1);
- u2 += skip_proto (u2);
-
- /* Skip username ans password, if present. */
- u1 += skip_uname (u1);
- u2 += skip_uname (u2);
-
- for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
- p1 = strdupdelim (s, u1);
- for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
- p2 = strdupdelim (s, u2);
- DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
- if (strcasecmp (p1, p2) == 0)
+ struct address_list *al;
+ if (!host_name_addresses_map)
+ return NULL;
+ al = hash_table_get (host_name_addresses_map, host);
+ if (al)
{
- free (p1);
- free (p2);
- DEBUGP (("They are quite alike.\n"));
- return 1;
+ DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
+ ++al->refcount;
+ return al;
}
- else if (opt.simple_check)
+ return NULL;
+}
+
+/* Cache the DNS lookup of HOST. Subsequent invocations of
+ lookup_host will return the cached value. */
+
+static void
+cache_store (const char *host, struct address_list *al)
+{
+ if (!host_name_addresses_map)
+ host_name_addresses_map = make_nocase_string_hash_table (0);
+
+ ++al->refcount;
+ hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
+
+#ifdef ENABLE_DEBUG
+ if (opt.debug)
{
- free (p1);
- free (p2);
- DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
- return 0;
+ int i;
+ debug_logprintf ("Caching %s =>", host);
+ for (i = 0; i < al->count; i++)
+ debug_logprintf (" %s", pretty_print_address (al->addresses + i));
+ debug_logprintf ("\n");
}
- real1 = realhost (p1);
- real2 = realhost (p2);
- free (p1);
- free (p2);
- if (strcasecmp (real1, real2) == 0)
+#endif
+}
+
+/* Remove HOST from the DNS cache. Does nothing is HOST is not in
+ the cache. */
+
+static void
+cache_remove (const char *host)
+{
+ struct address_list *al;
+ if (!host_name_addresses_map)
+ return;
+ al = hash_table_get (host_name_addresses_map, host);
+ if (al)
{
- DEBUGP (("They are alike, after realhost()->%s.\n", real1));
- free (real1);
- free (real2);
- return 1;
+ address_list_release (al);
+ hash_table_remove (host_name_addresses_map, host);
}
- else
+}
+\f
+/* Look up HOST in DNS and return a list of IP addresses. The
+ addresses in the list are in the same order in which
+ gethostbyname/getaddrinfo returned them.
+
+ This function caches its result so that, if the same host is passed
+ the second time, the addresses are returned without DNS lookup.
+ (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to
+ globally disable caching.)
+
+ FLAGS can be a combination of:
+ LH_SILENT - don't print the "resolving ... done" messages.
+ LH_BIND - resolve addresses for use with bind, which under
+ IPv6 means to use AI_PASSIVE flag to getaddrinfo.
+ Passive lookups are not cached under IPv6.
+ LH_REFRESH - if HOST is cached, remove the entry from the cache
+ and resolve it anew. */
+
+struct address_list *
+lookup_host (const char *host, int flags)
+{
+ struct address_list *al;
+ int silent = flags & LH_SILENT;
+ int use_cache;
+ int numeric_address = 0;
+ double timeout = opt.dns_timeout;
+
+#ifndef ENABLE_IPV6
+ /* If we're not using getaddrinfo, first check if HOST specifies a
+ numeric IPv4 address. Some implementations of gethostbyname
+ (e.g. the Ultrix one and possibly Winsock) don't accept
+ dotted-decimal IPv4 addresses. */
+ {
+ uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
+ if (addr_ipv4 != (uint32_t) -1)
+ {
+ /* No need to cache host->addr relation, just return the
+ address. */
+ char *vec[2];
+ vec[0] = (char *)&addr_ipv4;
+ vec[1] = NULL;
+ return address_list_from_ipv4_addresses (vec);
+ }
+ }
+#else /* ENABLE_IPV6 */
+ /* If we're using getaddrinfo, at least check whether the address is
+ already numeric, in which case there is no need to print the
+ "Resolving..." output. (This comes at no additional cost since
+ the is_valid_ipv*_address are already required for
+ url_parse.) */
+ {
+ const char *end = host + strlen (host);
+ if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end))
+ numeric_address = 1;
+ }
+#endif
+
+ /* Cache is normally on, but can be turned off with --no-dns-cache.
+ Don't cache passive lookups under IPv6. */
+ use_cache = opt.dns_cache;
+#ifdef ENABLE_IPV6
+ if ((flags & LH_BIND) || numeric_address)
+ use_cache = 0;
+#endif
+
+ /* Try to find the host in the cache so we don't need to talk to the
+ resolver. If LH_REFRESH is requested, remove HOST from the cache
+ instead. */
+ if (use_cache)
{
- DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
- free (real1);
- free (real2);
- return 0;
+ if (!(flags & LH_REFRESH))
+ {
+ al = cache_query (host);
+ if (al)
+ return al;
+ }
+ else
+ cache_remove (host);
}
-}
+ /* No luck with the cache; resolve HOST. */
+
+ if (!silent && !numeric_address)
+ logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
+
+#ifdef ENABLE_IPV6
+ {
+ int err;
+ struct addrinfo hints, *res;
+
+ xzero (hints);
+ hints.ai_socktype = SOCK_STREAM;
+ if (opt.ipv4_only)
+ hints.ai_family = AF_INET;
+ else if (opt.ipv6_only)
+ hints.ai_family = AF_INET6;
+ else
+ {
+ hints.ai_family = AF_UNSPEC;
+#ifdef AI_ADDRCONFIG
+ hints.ai_flags |= AI_ADDRCONFIG;
+#else
+ /* On systems without AI_ADDRCONFIG, emulate it by manually
+ checking whether the system supports IPv6 sockets. */
+ if (!socket_has_inet6 ())
+ hints.ai_family = AF_INET;
+#endif
+ }
+ if (flags & LH_BIND)
+ hints.ai_flags |= AI_PASSIVE;
+
+#ifdef AI_NUMERICHOST
+ if (numeric_address)
+ {
+ /* Where available, the AI_NUMERICHOST hint can prevent costly
+ access to DNS servers. */
+ hints.ai_flags |= AI_NUMERICHOST;
+ timeout = 0; /* no timeout needed when "resolving"
+ numeric hosts -- avoid setting up
+ signal handlers and such. */
+ }
+#endif
+
+ err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout);
+ if (err != 0 || res == NULL)
+ {
+ if (!silent)
+ logprintf (LOG_VERBOSE, _("failed: %s.\n"),
+ err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
+ return NULL;
+ }
+ al = address_list_from_addrinfo (res);
+ freeaddrinfo (res);
+ if (!al)
+ {
+ logprintf (LOG_VERBOSE,
+ _("failed: No IPv4/IPv6 addresses for host.\n"));
+ return NULL;
+ }
+ }
+#else /* not ENABLE_IPV6 */
+ {
+ struct hostent *hptr = gethostbyname_with_timeout (host, timeout);
+ if (!hptr)
+ {
+ if (!silent)
+ {
+ if (errno != ETIMEDOUT)
+ logprintf (LOG_VERBOSE, _("failed: %s.\n"),
+ host_errstr (h_errno));
+ else
+ logputs (LOG_VERBOSE, _("failed: timed out.\n"));
+ }
+ return NULL;
+ }
+ /* Do older systems have h_addr_list? */
+ al = address_list_from_ipv4_addresses (hptr->h_addr_list);
+ }
+#endif /* not ENABLE_IPV6 */
+
+ /* Print the addresses determined by DNS lookup, but no more than
+ three. */
+ if (!silent && !numeric_address)
+ {
+ int i;
+ int printmax = al->count <= 3 ? al->count : 3;
+ for (i = 0; i < printmax; i++)
+ {
+ logprintf (LOG_VERBOSE, "%s",
+ pretty_print_address (al->addresses + i));
+ if (i < printmax - 1)
+ logputs (LOG_VERBOSE, ", ");
+ }
+ if (printmax != al->count)
+ logputs (LOG_VERBOSE, ", ...");
+ logputs (LOG_VERBOSE, "\n");
+ }
+
+ /* Cache the lookup information. */
+ if (use_cache)
+ cache_store (host, al);
+
+ return al;
+}
+\f
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
-accept_domain (struct urlinfo *u)
+accept_domain (struct url *u)
{
assert (u->host != NULL);
if (opt.domains)
for (i = 0; list[i]; i++)
{
for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
- if (tolower (list[i][j]) != tolower (what[k]))
+ if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
break;
/* The domain must be first to reach to beginning. */
if (j == -1)
return 0;
}
-/* Return email address of the form username@FQDN suitable for
- anonymous FTP passwords. This process is error-prone, and the
- escape hatch is the MY_HOST preprocessor constant, which can be
- used to hard-code either your hostname or FQDN at compile-time.
-
- If the FQDN cannot be determined, a warning is printed, and the
- function returns a short `username@' form, accepted by most
- anonymous servers.
-
- If not even the username cannot be divined, it means things are
- seriously fucked up, and Wget exits. */
-char *
-ftp_getaddress (void)
+static int
+host_cleanup_mapper (void *key, void *value, void *arg_ignored)
{
- static char *address;
+ struct address_list *al;
- /* Do the drill only the first time, as it won't change. */
- if (!address)
- {
- char userid[32]; /* 9 should be enough for Unix, but
- I'd rather be on the safe side. */
- char *host, *fqdn;
+ xfree (key); /* host */
- if (!pwd_cuserid (userid))
- {
- logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
- exec_name);
- exit (1);
- }
-#ifdef MY_HOST
- STRDUP_ALLOCA (host, MY_HOST);
-#else /* not MY_HOST */
-#ifdef HAVE_UNAME
- {
- struct utsname ubuf;
- if (uname (&ubuf) < 0)
- {
- logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
- exec_name, strerror (errno));
- fqdn = "";
- goto giveup;
- }
- STRDUP_ALLOCA (host, ubuf.nodename);
- }
-#else /* not HAVE_UNAME */
-#ifdef HAVE_GETHOSTNAME
- host = alloca (256);
- if (gethostname (host, 256) < 0)
- {
- logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
- exec_name);
- fqdn = "";
- goto giveup;
- }
-#else /* not HAVE_GETHOSTNAME */
- #error Cannot determine host name.
-#endif /* not HAVE_GETHOSTNAME */
-#endif /* not HAVE_UNAME */
-#endif /* not MY_HOST */
- /* If the address we got so far contains a period, don't bother
- anymore. */
- if (strchr (host, '.'))
- fqdn = host;
- else
- {
- /* #### I've seen the following scheme fail on at least one
- system! Do we care? */
- char *tmpstore;
- /* According to Richard Stevens, the correct way to find the
- FQDN is to (1) find the host name, (2) find its IP
- address using gethostbyname(), and (3) get the FQDN using
- gethostbyaddr(). So that's what we'll do. Step one has
- been done above. */
- /* (2) */
- struct hostent *hp = gethostbyname (host);
- if (!hp || !hp->h_addr_list)
- {
- logprintf (LOG_ALWAYS, _("\
-%s: Warning: cannot determine local IP address.\n"),
- exec_name);
- fqdn = "";
- goto giveup;
- }
- /* Copy the argument, so the call to gethostbyaddr doesn't
- clobber it -- just in case. */
- tmpstore = (char *)alloca (hp->h_length);
- memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
- /* (3) */
- hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
- if (!hp || !hp->h_name)
- {
- logprintf (LOG_ALWAYS, _("\
-%s: Warning: cannot reverse-lookup local IP address.\n"),
- exec_name);
- fqdn = "";
- goto giveup;
- }
- if (!strchr (hp->h_name, '.'))
- {
-#if 0
- /* This gets ticked pretty often. Karl Berry reports
- that there can be valid reasons for the local host
- name not to be an FQDN, so I've decided to remove the
- annoying warning. */
- logprintf (LOG_ALWAYS, _("\
-%s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
- exec_name);
-#endif
- fqdn = "";
- goto giveup;
- }
- /* Once we're here, hp->h_name contains the correct FQDN. */
- STRDUP_ALLOCA (fqdn, hp->h_name);
- }
- giveup:
- address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
- sprintf (address, "%s@%s", userid, fqdn);
- }
- return address;
-}
+ al = (struct address_list *)value;
+ assert (al->refcount == 1);
+ address_list_delete (al);
-/* Print error messages for host errors. */
-char *
-herrmsg (int error)
-{
- /* Can't use switch since some constants are equal (at least on my
- system), and the compiler signals "duplicate case value". */
- if (error == HOST_NOT_FOUND
- || error == NO_RECOVERY
- || error == NO_DATA
- || error == NO_ADDRESS
- || error == TRY_AGAIN)
- return _("Host not found");
- else
- return _("Unknown error");
+ return 0;
}
-/* Clean the host list. This is a separate function, so we needn't
- export HLIST and its implementation. Ha! */
void
-clean_hosts (void)
+host_cleanup (void)
{
- struct host *l = hlist;
-
- while (l)
+ if (host_name_addresses_map)
{
- struct host *p = l->next;
- free (l->hostname);
- free (l->realname);
- free (l);
- l = p;
+ hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
+ hash_table_destroy (host_name_addresses_map);
+ host_name_addresses_map = NULL;
}
- hlist = NULL;
}