1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
34 #include <sys/types.h>
38 # define SET_H_ERRNO(err) WSASetLastError(err)
40 # include <sys/socket.h>
41 # include <netinet/in.h>
43 # include <arpa/inet.h>
46 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
50 #define NO_ADDRESS NO_DATA
53 #ifdef HAVE_SYS_UTSNAME_H
54 # include <sys/utsname.h>
75 int ip_default_family = AF_INET6;
77 int ip_default_family = AF_INET;
80 /* Mapping between known hosts and to lists of their addresses. */
82 static struct hash_table *host_name_addresses_map;
84 /* Lists of addresses. This should eventually be extended to handle
88 int count; /* number of adrresses */
89 ip_address *addresses; /* pointer to the string of addresses */
91 int faulty; /* number of addresses known not to work. */
92 int refcount; /* so we know whether to free it or not. */
95 /* Get the bounds of the address list. */
98 address_list_get_bounds (struct address_list *al, int *start, int *end)
104 /* Copy address number INDEX to IP_STORE. */
107 address_list_copy_one (struct address_list *al, int index, ip_address *ip_store)
109 assert (index >= al->faulty && index < al->count);
110 memcpy (ip_store, al->addresses + index, sizeof (ip_address));
113 /* Check whether two address lists have all their IPs in common. */
116 address_list_match_all (struct address_list *al1, struct address_list *al2)
120 if (al1->count != al2->count)
122 return 0 == memcmp (al1->addresses, al2->addresses,
123 al1->count * sizeof (ip_address));
126 /* Mark the INDEXth element of AL as faulty, so that the next time
127 this address list is used, the faulty element will be skipped. */
130 address_list_set_faulty (struct address_list *al, int index)
132 /* We assume that the address list is traversed in order, so that a
133 "faulty" attempt is always preceded with all-faulty addresses,
134 and this is how Wget uses it. */
135 assert (index == al->faulty);
138 if (al->faulty >= al->count)
139 /* All addresses have been proven faulty. Since there's not much
140 sense in returning the user an empty address list the next
141 time, we'll rather make them all clean, so that they can be
148 * address_list_from_addrinfo
150 * This function transform an addrinfo links list in and address_list.
153 * addrinfo* Linkt list of addrinfo
156 * address_list* New allocated address_list
158 static struct address_list *
159 address_list_from_addrinfo (struct addrinfo *ai)
161 struct address_list *al;
162 struct addrinfo *ai_head = ai;
166 for (ai = ai_head; ai; ai = ai->ai_next)
167 if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6)
172 al = xmalloc (sizeof (struct address_list));
173 al->addresses = xmalloc (cnt * sizeof (ip_address));
178 for (i = 0, ai = ai_head; ai; ai = ai->ai_next)
179 if (ai->ai_family == AF_INET6)
181 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai->ai_addr;
182 memcpy (al->addresses + i, &sin6->sin6_addr, 16);
185 else if (ai->ai_family == AF_INET)
187 struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
188 map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i);
195 /* Create an address_list out of a NULL-terminated list of addresses,
196 as returned by gethostbyname. */
197 static struct address_list *
198 address_list_new (char **h_addr_list)
202 struct address_list *al = xmalloc (sizeof (struct address_list));
204 while (h_addr_list[count])
209 al->addresses = xmalloc (count * sizeof (ip_address));
212 for (i = 0; i < count; i++)
213 map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i);
219 /* Like address_list_new, but initialized with only one address. */
221 static struct address_list *
222 address_list_new_one (ip_address *addr)
224 struct address_list *al = xmalloc (sizeof (struct address_list));
227 al->addresses = xmalloc (sizeof (ip_address));
229 memcpy (al->addresses, addr, sizeof (ip_address));
235 address_list_delete (struct address_list *al)
237 xfree (al->addresses);
242 address_list_release (struct address_list *al)
245 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
246 if (al->refcount <= 0)
248 DEBUGP (("Deleting unused %p.\n", al));
249 address_list_delete (al);
254 * wget_sockaddr_set_address
256 * This function takes an wget_sockaddr and fill in the protocol type,
257 * the port number and the address, there NULL in address means wildcard.
258 * Unsuported adress family will abort the whole programm.
261 * wget_sockaddr* The space to be filled
262 * int The wished protocol
263 * unsigned short The port
264 * const ip_address The Binary IP adress
267 * - Only modify 1. param
270 wget_sockaddr_set_address (wget_sockaddr *sa,
271 int ip_family, unsigned short port, ip_address *addr)
273 if (ip_family == AF_INET)
275 sa->sin.sin_family = ip_family;
276 sa->sin.sin_port = htons (port);
278 memset (&sa->sin.sin_addr, 0, sizeof(ip4_address));
282 if (!map_ip_to_ipv4 (addr, &addr4))
283 /* should the callers have prevented this? */
285 memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address));
290 if (ip_family == AF_INET6)
292 sa->sin6.sin6_family = ip_family;
293 sa->sin6.sin6_port = htons (port);
295 memset (&sa->sin6.sin6_addr, 0 , 16);
297 memcpy (&sa->sin6.sin6_addr, addr, 16);
305 * wget_sockaddr_set_port
307 * This funtion only fill the port of the socket information.
308 * If the protocol is not supported nothing is done.
309 * Unsuported adress family will abort the whole programm.
312 * that the IP-Protocol already is set.
315 * wget_sockaddr* The space there port should be entered
316 * unsigned int The port that should be entered in host order
319 * - Only modify 1. param
322 wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port)
324 if (sa->sa.sa_family == AF_INET)
326 sa->sin.sin_port = htons (port);
330 if (sa->sa.sa_family == AF_INET6)
332 sa->sin6.sin6_port = htons (port);
340 * wget_sockaddr_get_addr
342 * This function return the adress from an sockaddr as byte string.
343 * Unsuported adress family will abort the whole programm.
346 * that the IP-Protocol already is set.
349 * wget_sockaddr* Socket Information
352 * unsigned char * IP address as byte string.
355 wget_sockaddr_get_addr (wget_sockaddr *sa)
357 if (sa->sa.sa_family == AF_INET)
358 return &sa->sin.sin_addr;
360 if (sa->sa.sa_family == AF_INET6)
361 return &sa->sin6.sin6_addr;
369 * wget_sockaddr_get_port
371 * This function only return the port from the input structure
372 * Unsuported adress family will abort the whole programm.
375 * that the IP-Protocol already is set.
378 * wget_sockaddr* Information where to get the port
381 * unsigned short Port Number in host order.
384 wget_sockaddr_get_port (const wget_sockaddr *sa)
386 if (sa->sa.sa_family == AF_INET)
387 return htons (sa->sin.sin_port);
389 if (sa->sa.sa_family == AF_INET6)
390 return htons (sa->sin6.sin6_port);
393 /* do not complain about return nothing */
400 * This function return the length of the sockaddr corresponding to
401 * the acutall prefered protocol for (bind, connect etc...)
402 * Unsuported adress family will abort the whole programm.
405 * that the IP-Protocol already is set.
408 * - Public IP-Family Information
411 * int structure length for socket options
416 if (ip_default_family == AF_INET)
417 return sizeof (struct sockaddr_in);
419 if (ip_default_family == AF_INET6)
420 return sizeof (struct sockaddr_in6);
423 /* do not complain about return nothing */
428 * Map an IPv4 adress to the internal adress format.
431 map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip)
434 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
435 memcpy ((char *)ip + 12, ipv4 , 4);
436 memcpy ((char *)ip + 0, ipv64, 12);
438 if ((char *)ip != (char *)ipv4)
439 memcpy (ip, ipv4, 4);
443 /* Detect whether an IP adress represents an IPv4 address and, if so,
444 copy it to IPV4. 0 is returned on failure.
445 This operation always succeeds when Wget is compiled without IPv6.
446 If IPV4 is NULL, don't copy, just detect. */
449 map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4)
452 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
453 if (0 != memcmp (ip, ipv64, 12))
456 memcpy (ipv4, (char *)ip + 12, 4);
459 memcpy (ipv4, (char *)ip, 4);
464 /* Versions of gethostbyname and getaddrinfo that support timeout. */
468 struct ghbnwt_context {
469 const char *host_name;
470 struct hostent *hptr;
474 gethostbyname_with_timeout_callback (void *arg)
476 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
477 ctx->hptr = gethostbyname (ctx->host_name);
480 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
481 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
482 The function makes sure that when NULL is returned for reasons
483 other than timeout, errno is reset. */
485 static struct hostent *
486 gethostbyname_with_timeout (const char *host_name, int timeout)
488 struct ghbnwt_context ctx;
489 ctx.host_name = host_name;
490 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
492 SET_H_ERRNO (HOST_NOT_FOUND);
503 struct gaiwt_context {
506 const struct addrinfo *hints;
507 struct addrinfo **res;
512 getaddrinfo_with_timeout_callback (void *arg)
514 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
515 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
518 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
519 In case of timeout, the EAI_SYSTEM error code is returned and errno
520 is set to ETIMEDOUT. */
523 getaddrinfo_with_timeout (const char *node, const char *service,
524 const struct addrinfo *hints, struct addrinfo **res,
527 struct gaiwt_context ctx;
529 ctx.service = service;
533 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
538 return ctx.exit_code;
543 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
544 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
548 pretty_print_address (ip_address *addr)
552 static char buf[128];
554 if (map_ip_to_ipv4 (addr, &addr4))
555 return inet_ntoa (*(struct in_addr *)&addr4);
557 if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf)))
561 return inet_ntoa (*(struct in_addr *)addr);
564 /* Add host name HOST with the address ADDR_TEXT to the cache.
565 ADDR_LIST is a NULL-terminated list of addresses, as in struct
569 cache_host_lookup (const char *host, struct address_list *al)
571 if (!host_name_addresses_map)
572 host_name_addresses_map = make_nocase_string_hash_table (0);
575 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
581 debug_logprintf ("Caching %s =>", host);
582 for (i = 0; i < al->count; i++)
583 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
584 debug_logprintf ("\n");
589 struct address_list *
590 lookup_host (const char *host, int silent)
592 struct address_list *al = NULL;
593 unsigned long addr_ipv4; /* #### use a 32-bit type here. */
596 /* First, try to check whether the address is already a numeric
600 if (inet_pton (AF_INET6, host, &addr) > 0)
601 return address_list_new_one (&addr);
604 addr_ipv4 = (unsigned long)inet_addr (host);
605 if ((int)addr_ipv4 != -1)
607 /* ADDR is defined to be in network byte order, which is what
608 this returns, so we can just copy it to STORE_IP. However,
609 on big endian 64-bit architectures the value will be stored
610 in the *last*, not first four bytes. OFFSET makes sure that
611 we copy the correct four bytes. */
613 #ifdef WORDS_BIGENDIAN
614 offset = sizeof (unsigned long) - sizeof (ip4_address);
616 map_ipv4_to_ip ((ip4_address *)((char *)&addr_ipv4 + offset), &addr);
617 return address_list_new_one (&addr);
620 if (host_name_addresses_map)
622 al = hash_table_get (host_name_addresses_map, host);
626 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
633 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
635 /* Host name lookup goes on below. */
639 struct addrinfo hints, *ai;
642 memset (&hints, 0, sizeof (hints));
643 if (ip_default_family == AF_INET)
644 hints.ai_family = AF_INET;
646 hints.ai_family = PF_UNSPEC;
647 hints.ai_socktype = SOCK_STREAM;
648 err = getaddrinfo_with_timeout (host, NULL, &hints, &ai, opt.timeout);
650 if (err != 0 || ai == NULL)
653 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
654 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
657 al = address_list_from_addrinfo (ai);
662 struct hostent *hptr = gethostbyname_with_timeout (host, opt.timeout);
667 if (errno != ETIMEDOUT)
668 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
670 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
674 /* Do all systems have h_addr_list, or is it a newer thing? If
675 the latter, use address_list_new_one. */
676 al = address_list_new (hptr->h_addr_list);
681 logprintf (LOG_VERBOSE, _("done.\n"));
683 /* Cache the lookup information. */
684 cache_host_lookup (host, al);
689 /* Determine whether a URL is acceptable to be followed, according to
690 a list of domains to accept. */
692 accept_domain (struct url *u)
694 assert (u->host != NULL);
697 if (!sufmatch ((const char **)opt.domains, u->host))
700 if (opt.exclude_domains)
702 if (sufmatch ((const char **)opt.exclude_domains, u->host))
708 /* Check whether WHAT is matched in LIST, each element of LIST being a
709 pattern to match WHAT against, using backward matching (see
710 match_backwards() in utils.c).
712 If an element of LIST matched, 1 is returned, 0 otherwise. */
714 sufmatch (const char **list, const char *what)
719 for (i = 0; list[i]; i++)
721 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
722 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
724 /* The domain must be first to reach to beginning. */
731 /* Print error messages for host errors. */
735 /* Can't use switch since some constants are equal (at least on my
736 system), and the compiler signals "duplicate case value". */
737 if (error == HOST_NOT_FOUND
738 || error == NO_RECOVERY
740 || error == NO_ADDRESS
741 || error == TRY_AGAIN)
742 return _("Host not found");
744 return _("Unknown error");
748 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
750 struct address_list *al;
752 xfree (key); /* host */
754 al = (struct address_list *)value;
755 assert (al->refcount == 1);
756 address_list_delete (al);
764 if (host_name_addresses_map)
766 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
767 hash_table_destroy (host_name_addresses_map);
768 host_name_addresses_map = NULL;