1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
34 #include <sys/types.h>
39 # include <sys/socket.h>
40 # include <netinet/in.h>
42 # include <arpa/inet.h>
48 #define NO_ADDRESS NO_DATA
51 #ifdef HAVE_SYS_UTSNAME_H
52 # include <sys/utsname.h>
73 int ip_default_family = AF_INET6;
75 int ip_default_family = AF_INET;
78 /* Mapping between known hosts and to lists of their addresses. */
80 static struct hash_table *host_name_addresses_map;
82 /* Lists of addresses. This should eventually be extended to handle
86 int count; /* number of adrresses */
87 ip_address *addresses; /* pointer to the string of addresses */
89 int faulty; /* number of addresses known not to work. */
90 int refcount; /* so we know whether to free it or not. */
93 /* Get the bounds of the address list. */
96 address_list_get_bounds (struct address_list *al, int *start, int *end)
102 /* Copy address number INDEX to IP_STORE. */
105 address_list_copy_one (struct address_list *al, int index, ip_address *ip_store)
107 assert (index >= al->faulty && index < al->count);
108 memcpy (ip_store, al->addresses + index, sizeof (ip_address));
111 /* Check whether two address lists have all their IPs in common. */
114 address_list_match_all (struct address_list *al1, struct address_list *al2)
118 if (al1->count != al2->count)
120 return 0 == memcmp (al1->addresses, al2->addresses,
121 al1->count * sizeof (ip_address));
124 /* Mark the INDEXth element of AL as faulty, so that the next time
125 this address list is used, the faulty element will be skipped. */
128 address_list_set_faulty (struct address_list *al, int index)
130 /* We assume that the address list is traversed in order, so that a
131 "faulty" attempt is always preceded with all-faulty addresses,
132 and this is how Wget uses it. */
133 assert (index == al->faulty);
136 if (al->faulty >= al->count)
137 /* All addresses have been proven faulty. Since there's not much
138 sense in returning the user an empty address list the next
139 time, we'll rather make them all clean, so that they can be
146 * address_list_from_addrinfo
148 * This function transform an addrinfo links list in and address_list.
151 * addrinfo* Linkt list of addrinfo
154 * address_list* New allocated address_list
156 static struct address_list *
157 address_list_from_addrinfo (struct addrinfo *ai)
159 struct address_list *al;
160 struct addrinfo *ai_head = ai;
164 for (ai = ai_head; ai; ai = ai->ai_next)
165 if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6)
170 al = xmalloc (sizeof (struct address_list));
171 al->addresses = xmalloc (cnt * sizeof (ip_address));
176 for (i = 0, ai = ai_head; ai; ai = ai->ai_next)
177 if (ai->ai_family == AF_INET6)
179 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai->ai_addr;
180 memcpy (al->addresses + i, &sin6->sin6_addr, 16);
183 else if (ai->ai_family == AF_INET)
185 struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
186 map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i);
193 /* Create an address_list out of a NULL-terminated list of addresses,
194 as returned by gethostbyname. */
195 static struct address_list *
196 address_list_new (char **h_addr_list)
200 struct address_list *al = xmalloc (sizeof (struct address_list));
202 while (h_addr_list[count])
207 al->addresses = xmalloc (count * sizeof (ip_address));
210 for (i = 0; i < count; i++)
211 map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i);
217 /* Like address_list_new, but initialized with only one address. */
219 static struct address_list *
220 address_list_new_one (ip_address *addr)
222 struct address_list *al = xmalloc (sizeof (struct address_list));
225 al->addresses = xmalloc (sizeof (ip_address));
227 memcpy (al->addresses, addr, sizeof (ip_address));
233 address_list_delete (struct address_list *al)
235 xfree (al->addresses);
240 address_list_release (struct address_list *al)
243 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
244 if (al->refcount <= 0)
246 DEBUGP (("Deleting unused %p.\n", al));
247 address_list_delete (al);
252 * wget_sockaddr_set_address
254 * This function takes an wget_sockaddr and fill in the protocol type,
255 * the port number and the address, there NULL in address means wildcard.
256 * Unsuported adress family will abort the whole programm.
259 * wget_sockaddr* The space to be filled
260 * int The wished protocol
261 * unsigned short The port
262 * const ip_address The Binary IP adress
265 * - Only modify 1. param
268 wget_sockaddr_set_address (wget_sockaddr *sa,
269 int ip_family, unsigned short port, ip_address *addr)
271 if (ip_family == AF_INET)
273 sa->sin.sin_family = ip_family;
274 sa->sin.sin_port = htons (port);
276 memset (&sa->sin.sin_addr, 0, sizeof(ip4_address));
280 if (!map_ip_to_ipv4 (addr, &addr4))
281 /* should the callers have prevented this? */
283 memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address));
288 if (ip_family == AF_INET6)
290 sa->sin6.sin6_family = ip_family;
291 sa->sin6.sin6_port = htons (port);
293 memset (&sa->sin6.sin6_addr, 0 , 16);
295 memcpy (&sa->sin6.sin6_addr, addr, 16);
303 * wget_sockaddr_set_port
305 * This funtion only fill the port of the socket information.
306 * If the protocol is not supported nothing is done.
307 * Unsuported adress family will abort the whole programm.
310 * that the IP-Protocol already is set.
313 * wget_sockaddr* The space there port should be entered
314 * unsigned int The port that should be entered in host order
317 * - Only modify 1. param
320 wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port)
322 if (sa->sa.sa_family == AF_INET)
324 sa->sin.sin_port = htons (port);
328 if (sa->sa.sa_family == AF_INET6)
330 sa->sin6.sin6_port = htons (port);
338 * wget_sockaddr_get_addr
340 * This function return the adress from an sockaddr as byte string.
341 * Unsuported adress family will abort the whole programm.
344 * that the IP-Protocol already is set.
347 * wget_sockaddr* Socket Information
350 * unsigned char * IP address as byte string.
353 wget_sockaddr_get_addr (wget_sockaddr *sa)
355 if (sa->sa.sa_family == AF_INET)
356 return &sa->sin.sin_addr;
358 if (sa->sa.sa_family == AF_INET6)
359 return &sa->sin6.sin6_addr;
367 * wget_sockaddr_get_port
369 * This function only return the port from the input structure
370 * Unsuported adress family will abort the whole programm.
373 * that the IP-Protocol already is set.
376 * wget_sockaddr* Information where to get the port
379 * unsigned short Port Number in host order.
382 wget_sockaddr_get_port (const wget_sockaddr *sa)
384 if (sa->sa.sa_family == AF_INET)
385 return htons (sa->sin.sin_port);
387 if (sa->sa.sa_family == AF_INET6)
388 return htons (sa->sin6.sin6_port);
391 /* do not complain about return nothing */
398 * This function return the length of the sockaddr corresponding to
399 * the acutall prefered protocol for (bind, connect etc...)
400 * Unsuported adress family will abort the whole programm.
403 * that the IP-Protocol already is set.
406 * - Public IP-Family Information
409 * int structure length for socket options
414 if (ip_default_family == AF_INET)
415 return sizeof (struct sockaddr_in);
417 if (ip_default_family == AF_INET6)
418 return sizeof (struct sockaddr_in6);
421 /* do not complain about return nothing */
426 * Map an IPv4 adress to the internal adress format.
429 map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip)
432 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
433 memcpy ((char *)ip + 12, ipv4 , 4);
434 memcpy ((char *)ip + 0, ipv64, 12);
436 if ((char *)ip != (char *)ipv4)
437 memcpy (ip, ipv4, 4);
441 /* Detect whether an IP adress represents an IPv4 address and, if so,
442 copy it to IPV4. 0 is returned on failure.
443 This operation always succeeds when Wget is compiled without IPv6.
444 If IPV4 is NULL, don't copy, just detect. */
447 map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4)
450 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
451 if (0 != memcmp (ip, ipv64, 12))
454 memcpy (ipv4, (char *)ip + 12, 4);
457 memcpy (ipv4, (char *)ip, 4);
462 /* Versions of gethostbyname and getaddrinfo that support timeout. */
466 struct ghbnwt_context {
467 const char *host_name;
468 struct hostent *hptr;
472 gethostbyname_with_timeout_callback (void *arg)
474 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
475 ctx->hptr = gethostbyname (ctx->host_name);
478 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
479 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
480 The function makes sure that when NULL is returned for reasons
481 other than timeout, errno is reset. */
483 static struct hostent *
484 gethostbyname_with_timeout (const char *host_name, int timeout)
486 struct ghbnwt_context ctx;
487 ctx.host_name = host_name;
488 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
490 h_errno = HOST_NOT_FOUND;
501 struct gaiwt_context {
504 const struct addrinfo *hints;
505 struct addrinfo **res;
510 getaddrinfo_with_timeout_callback (void *arg)
512 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
513 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
516 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
517 In case of timeout, the EAI_SYSTEM error code is returned and errno
518 is set to ETIMEDOUT. */
521 getaddrinfo_with_timeout (const char *node, const char *service,
522 const struct addrinfo *hints, struct addrinfo **res,
525 struct gaiwt_context ctx;
527 ctx.service = service;
531 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
536 return ctx.exit_code;
541 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
542 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
546 pretty_print_address (ip_address *addr)
550 static char buf[128];
552 if (map_ip_to_ipv4 (addr, &addr4))
553 return inet_ntoa (*(struct in_addr *)&addr4);
555 if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf)))
559 return inet_ntoa (*(struct in_addr *)addr);
562 /* Add host name HOST with the address ADDR_TEXT to the cache.
563 ADDR_LIST is a NULL-terminated list of addresses, as in struct
567 cache_host_lookup (const char *host, struct address_list *al)
569 if (!host_name_addresses_map)
570 host_name_addresses_map = make_nocase_string_hash_table (0);
573 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
579 debug_logprintf ("Caching %s =>", host);
580 for (i = 0; i < al->count; i++)
581 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
582 debug_logprintf ("\n");
587 struct address_list *
588 lookup_host (const char *host, int silent)
590 struct address_list *al = NULL;
591 unsigned long addr_ipv4; /* #### use a 32-bit type here. */
594 /* First, try to check whether the address is already a numeric
598 if (inet_pton (AF_INET6, host, &addr) > 0)
599 return address_list_new_one (&addr);
602 addr_ipv4 = (unsigned long)inet_addr (host);
603 if ((int)addr_ipv4 != -1)
605 /* ADDR is defined to be in network byte order, which is what
606 this returns, so we can just copy it to STORE_IP. However,
607 on big endian 64-bit architectures the value will be stored
608 in the *last*, not first four bytes. OFFSET makes sure that
609 we copy the correct four bytes. */
611 #ifdef WORDS_BIGENDIAN
612 offset = sizeof (unsigned long) - sizeof (ip4_address);
614 map_ipv4_to_ip ((ip4_address *)((char *)&addr_ipv4 + offset), &addr);
615 return address_list_new_one (&addr);
618 if (host_name_addresses_map)
620 al = hash_table_get (host_name_addresses_map, host);
624 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
631 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
633 /* Host name lookup goes on below. */
637 struct addrinfo hints, *ai;
640 memset (&hints, 0, sizeof (hints));
641 if (ip_default_family == AF_INET)
642 hints.ai_family = AF_INET;
644 hints.ai_family = PF_UNSPEC;
645 hints.ai_socktype = SOCK_STREAM;
646 err = getaddrinfo_with_timeout (host, NULL, &hints, &ai, opt.timeout);
648 if (err != 0 || ai == NULL)
651 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
652 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
655 al = address_list_from_addrinfo (ai);
660 struct hostent *hptr = gethostbyname_with_timeout (host, opt.timeout);
665 if (errno != ETIMEDOUT)
666 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
668 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
672 /* Do all systems have h_addr_list, or is it a newer thing? If
673 the latter, use address_list_new_one. */
674 al = address_list_new (hptr->h_addr_list);
679 logprintf (LOG_VERBOSE, _("done.\n"));
681 /* Cache the lookup information. */
682 cache_host_lookup (host, al);
687 /* Determine whether a URL is acceptable to be followed, according to
688 a list of domains to accept. */
690 accept_domain (struct url *u)
692 assert (u->host != NULL);
695 if (!sufmatch ((const char **)opt.domains, u->host))
698 if (opt.exclude_domains)
700 if (sufmatch ((const char **)opt.exclude_domains, u->host))
706 /* Check whether WHAT is matched in LIST, each element of LIST being a
707 pattern to match WHAT against, using backward matching (see
708 match_backwards() in utils.c).
710 If an element of LIST matched, 1 is returned, 0 otherwise. */
712 sufmatch (const char **list, const char *what)
717 for (i = 0; list[i]; i++)
719 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
720 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
722 /* The domain must be first to reach to beginning. */
729 /* Print error messages for host errors. */
733 /* Can't use switch since some constants are equal (at least on my
734 system), and the compiler signals "duplicate case value". */
735 if (error == HOST_NOT_FOUND
736 || error == NO_RECOVERY
738 || error == NO_ADDRESS
739 || error == TRY_AGAIN)
740 return _("Host not found");
742 return _("Unknown error");
746 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
748 struct address_list *al;
750 xfree (key); /* host */
752 al = (struct address_list *)value;
753 assert (al->refcount == 1);
754 address_list_delete (al);
762 if (host_name_addresses_map)
764 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
765 hash_table_destroy (host_name_addresses_map);
766 host_name_addresses_map = NULL;