1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError(err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 #define NO_ADDRESS NO_DATA
63 #ifdef HAVE_SYS_UTSNAME_H
64 # include <sys/utsname.h>
85 int ip_default_family = AF_INET6;
87 int ip_default_family = AF_INET;
90 /* Mapping between known hosts and to lists of their addresses. */
92 static struct hash_table *host_name_addresses_map;
94 /* Lists of addresses. This should eventually be extended to handle
98 int count; /* number of adrresses */
99 ip_address *addresses; /* pointer to the string of addresses */
101 int faulty; /* number of addresses known not to work. */
102 int refcount; /* so we know whether to free it or not. */
105 /* Get the bounds of the address list. */
108 address_list_get_bounds (struct address_list *al, int *start, int *end)
114 /* Copy address number INDEX to IP_STORE. */
117 address_list_copy_one (struct address_list *al, int index, ip_address *ip_store)
119 assert (index >= al->faulty && index < al->count);
120 memcpy (ip_store, al->addresses + index, sizeof (ip_address));
123 /* Check whether two address lists have all their IPs in common. */
126 address_list_match_all (struct address_list *al1, struct address_list *al2)
130 if (al1->count != al2->count)
132 return 0 == memcmp (al1->addresses, al2->addresses,
133 al1->count * sizeof (ip_address));
136 /* Mark the INDEXth element of AL as faulty, so that the next time
137 this address list is used, the faulty element will be skipped. */
140 address_list_set_faulty (struct address_list *al, int index)
142 /* We assume that the address list is traversed in order, so that a
143 "faulty" attempt is always preceded with all-faulty addresses,
144 and this is how Wget uses it. */
145 assert (index == al->faulty);
148 if (al->faulty >= al->count)
149 /* All addresses have been proven faulty. Since there's not much
150 sense in returning the user an empty address list the next
151 time, we'll rather make them all clean, so that they can be
156 #ifdef HAVE_GETADDRINFO
158 * address_list_from_addrinfo
160 * This function transform an addrinfo links list in and address_list.
163 * addrinfo* Linkt list of addrinfo
166 * address_list* New allocated address_list
168 static struct address_list *
169 address_list_from_addrinfo (struct addrinfo *ai)
171 struct address_list *al;
172 struct addrinfo *ai_head = ai;
176 for (ai = ai_head; ai; ai = ai->ai_next)
177 if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6)
182 al = xmalloc (sizeof (struct address_list));
183 al->addresses = xmalloc (cnt * sizeof (ip_address));
188 for (i = 0, ai = ai_head; ai; ai = ai->ai_next)
189 if (ai->ai_family == AF_INET6)
191 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai->ai_addr;
192 memcpy (al->addresses + i, &sin6->sin6_addr, 16);
195 else if (ai->ai_family == AF_INET)
197 struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
198 map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i);
205 /* Create an address_list out of a NULL-terminated vector of
206 addresses, as returned by gethostbyname. */
207 static struct address_list *
208 address_list_from_vector (char **h_addr_list)
212 struct address_list *al = xmalloc (sizeof (struct address_list));
214 while (h_addr_list[count])
219 al->addresses = xmalloc (count * sizeof (ip_address));
222 for (i = 0; i < count; i++)
223 map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i);
229 /* Like address_list_from_vector, but initialized with a single
232 static struct address_list *
233 address_list_from_single (ip_address *addr)
235 struct address_list *al = xmalloc (sizeof (struct address_list));
238 al->addresses = xmalloc (sizeof (ip_address));
240 memcpy (al->addresses, addr, sizeof (ip_address));
246 address_list_delete (struct address_list *al)
248 xfree (al->addresses);
253 address_list_release (struct address_list *al)
256 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
257 if (al->refcount <= 0)
259 DEBUGP (("Deleting unused %p.\n", al));
260 address_list_delete (al);
265 * wget_sockaddr_set_address
267 * This function takes an wget_sockaddr and fill in the protocol type,
268 * the port number and the address, there NULL in address means wildcard.
269 * Unsuported adress family will abort the whole programm.
272 * wget_sockaddr* The space to be filled
273 * int The wished protocol
274 * unsigned short The port
275 * const ip_address The Binary IP adress
278 * - Only modify 1. param
281 wget_sockaddr_set_address (wget_sockaddr *sa,
282 int ip_family, unsigned short port, ip_address *addr)
284 if (ip_family == AF_INET)
286 sa->sin.sin_family = ip_family;
287 sa->sin.sin_port = htons (port);
289 memset (&sa->sin.sin_addr, 0, sizeof(ip4_address));
293 if (!map_ip_to_ipv4 (addr, &addr4))
294 /* should the callers have prevented this? */
296 memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address));
301 if (ip_family == AF_INET6)
303 sa->sin6.sin6_family = ip_family;
304 sa->sin6.sin6_port = htons (port);
306 memset (&sa->sin6.sin6_addr, 0 , 16);
308 memcpy (&sa->sin6.sin6_addr, addr, 16);
316 * wget_sockaddr_set_port
318 * This funtion only fill the port of the socket information.
319 * If the protocol is not supported nothing is done.
320 * Unsuported adress family will abort the whole programm.
323 * that the IP-Protocol already is set.
326 * wget_sockaddr* The space there port should be entered
327 * unsigned int The port that should be entered in host order
330 * - Only modify 1. param
333 wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port)
335 if (sa->sa.sa_family == AF_INET)
337 sa->sin.sin_port = htons (port);
341 if (sa->sa.sa_family == AF_INET6)
343 sa->sin6.sin6_port = htons (port);
351 * wget_sockaddr_get_addr
353 * This function return the adress from an sockaddr as byte string.
354 * Unsuported adress family will abort the whole programm.
357 * that the IP-Protocol already is set.
360 * wget_sockaddr* Socket Information
363 * unsigned char * IP address as byte string.
366 wget_sockaddr_get_addr (wget_sockaddr *sa)
368 if (sa->sa.sa_family == AF_INET)
369 return &sa->sin.sin_addr;
371 if (sa->sa.sa_family == AF_INET6)
372 return &sa->sin6.sin6_addr;
380 * wget_sockaddr_get_port
382 * This function only return the port from the input structure
383 * Unsuported adress family will abort the whole programm.
386 * that the IP-Protocol already is set.
389 * wget_sockaddr* Information where to get the port
392 * unsigned short Port Number in host order.
395 wget_sockaddr_get_port (const wget_sockaddr *sa)
397 if (sa->sa.sa_family == AF_INET)
398 return htons (sa->sin.sin_port);
400 if (sa->sa.sa_family == AF_INET6)
401 return htons (sa->sin6.sin6_port);
404 /* do not complain about return nothing */
411 * This function return the length of the sockaddr corresponding to
412 * the acutall prefered protocol for (bind, connect etc...)
413 * Unsuported adress family will abort the whole programm.
416 * that the IP-Protocol already is set.
419 * - Public IP-Family Information
422 * int structure length for socket options
427 if (ip_default_family == AF_INET)
428 return sizeof (struct sockaddr_in);
430 if (ip_default_family == AF_INET6)
431 return sizeof (struct sockaddr_in6);
434 /* do not complain about return nothing */
439 * Map an IPv4 adress to the internal adress format.
442 map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip)
445 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
446 memcpy ((char *)ip + 12, ipv4 , 4);
447 memcpy ((char *)ip + 0, ipv64, 12);
449 if ((char *)ip != (char *)ipv4)
450 memcpy (ip, ipv4, 4);
454 /* Detect whether an IP adress represents an IPv4 address and, if so,
455 copy it to IPV4. 0 is returned on failure.
456 This operation always succeeds when Wget is compiled without IPv6.
457 If IPV4 is NULL, don't copy, just detect. */
460 map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4)
463 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
464 if (0 != memcmp (ip, ipv64, 12))
467 memcpy (ipv4, (char *)ip + 12, 4);
470 memcpy (ipv4, (char *)ip, 4);
475 /* Versions of gethostbyname and getaddrinfo that support timeout. */
479 struct ghbnwt_context {
480 const char *host_name;
481 struct hostent *hptr;
485 gethostbyname_with_timeout_callback (void *arg)
487 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
488 ctx->hptr = gethostbyname (ctx->host_name);
491 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
492 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
493 The function makes sure that when NULL is returned for reasons
494 other than timeout, errno is reset. */
496 static struct hostent *
497 gethostbyname_with_timeout (const char *host_name, double timeout)
499 struct ghbnwt_context ctx;
500 ctx.host_name = host_name;
501 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
503 SET_H_ERRNO (HOST_NOT_FOUND);
512 #else /* ENABLE_IPV6 */
514 struct gaiwt_context {
517 const struct addrinfo *hints;
518 struct addrinfo **res;
523 getaddrinfo_with_timeout_callback (void *arg)
525 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
526 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
529 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
530 In case of timeout, the EAI_SYSTEM error code is returned and errno
531 is set to ETIMEDOUT. */
534 getaddrinfo_with_timeout (const char *node, const char *service,
535 const struct addrinfo *hints, struct addrinfo **res,
538 struct gaiwt_context ctx;
540 ctx.service = service;
544 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
549 return ctx.exit_code;
552 #endif /* ENABLE_IPV6 */
554 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
555 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
559 pretty_print_address (ip_address *addr)
563 static char buf[128];
565 if (map_ip_to_ipv4 (addr, &addr4))
566 return inet_ntoa (*(struct in_addr *)&addr4);
568 if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf)))
572 return inet_ntoa (*(struct in_addr *)addr);
575 /* Add host name HOST with the address ADDR_TEXT to the cache.
576 ADDR_LIST is a NULL-terminated list of addresses, as in struct
580 cache_host_lookup (const char *host, struct address_list *al)
582 if (!host_name_addresses_map)
583 host_name_addresses_map = make_nocase_string_hash_table (0);
586 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
592 debug_logprintf ("Caching %s =>", host);
593 for (i = 0; i < al->count; i++)
594 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
595 debug_logprintf ("\n");
600 struct address_list *
601 lookup_host (const char *host, int silent)
603 struct address_list *al = NULL;
604 unsigned long addr_ipv4; /* #### use a 32-bit type here. */
607 /* First, try to check whether the address is already a numeric
611 if (inet_pton (AF_INET6, host, &addr) > 0)
612 return address_list_from_single (&addr);
615 addr_ipv4 = (unsigned long)inet_addr (host);
616 if ((int)addr_ipv4 != -1)
618 /* ADDR is defined to be in network byte order, which is what
619 this returns, so we can just copy it to STORE_IP. However,
620 on big endian 64-bit architectures the value will be stored
621 in the *last*, not first four bytes. OFFSET makes sure that
622 we copy the correct four bytes. */
624 #ifdef WORDS_BIGENDIAN
625 offset = sizeof (unsigned long) - sizeof (ip4_address);
627 map_ipv4_to_ip ((ip4_address *)((char *)&addr_ipv4 + offset), &addr);
628 return address_list_from_single (&addr);
631 if (host_name_addresses_map)
633 al = hash_table_get (host_name_addresses_map, host);
637 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
644 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
646 /* Host name lookup goes on below. */
648 #ifdef HAVE_GETADDRINFO
650 struct addrinfo hints, *ai;
653 memset (&hints, 0, sizeof (hints));
654 if (ip_default_family == AF_INET)
655 hints.ai_family = AF_INET;
657 hints.ai_family = PF_UNSPEC;
658 hints.ai_socktype = SOCK_STREAM;
659 err = getaddrinfo_with_timeout (host, NULL, &hints, &ai, opt.dns_timeout);
661 if (err != 0 || ai == NULL)
664 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
665 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
668 al = address_list_from_addrinfo (ai);
673 struct hostent *hptr;
674 hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
679 if (errno != ETIMEDOUT)
680 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
682 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
686 /* Do all systems have h_addr_list, or is it a newer thing? If
687 the latter, use address_list_from_single. */
688 al = address_list_from_vector (hptr->h_addr_list);
692 /* Print the addresses determined by DNS lookup, but no more than
697 int printmax = al->count <= 3 ? al->count : 3;
698 for (i = 0; i < printmax; i++)
700 logprintf (LOG_VERBOSE, "%s",
701 pretty_print_address (al->addresses + i));
702 if (i < printmax - 1)
703 logputs (LOG_VERBOSE, ", ");
705 if (printmax != al->count)
706 logputs (LOG_VERBOSE, ", ...");
707 logputs (LOG_VERBOSE, "\n");
710 /* Cache the lookup information. */
712 cache_host_lookup (host, al);
717 /* Determine whether a URL is acceptable to be followed, according to
718 a list of domains to accept. */
720 accept_domain (struct url *u)
722 assert (u->host != NULL);
725 if (!sufmatch ((const char **)opt.domains, u->host))
728 if (opt.exclude_domains)
730 if (sufmatch ((const char **)opt.exclude_domains, u->host))
736 /* Check whether WHAT is matched in LIST, each element of LIST being a
737 pattern to match WHAT against, using backward matching (see
738 match_backwards() in utils.c).
740 If an element of LIST matched, 1 is returned, 0 otherwise. */
742 sufmatch (const char **list, const char *what)
747 for (i = 0; list[i]; i++)
749 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
750 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
752 /* The domain must be first to reach to beginning. */
759 /* Print error messages for host errors. */
763 /* Can't use switch since some constants are equal (at least on my
764 system), and the compiler signals "duplicate case value". */
765 if (error == HOST_NOT_FOUND
766 || error == NO_RECOVERY
768 || error == NO_ADDRESS
769 || error == TRY_AGAIN)
770 return _("Host not found");
772 return _("Unknown error");
776 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
778 struct address_list *al;
780 xfree (key); /* host */
782 al = (struct address_list *)value;
783 assert (al->refcount == 1);
784 address_list_delete (al);
792 if (host_name_addresses_map)
794 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
795 hash_table_destroy (host_name_addresses_map);
796 host_name_addresses_map = NULL;