1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 #define NO_ADDRESS NO_DATA
63 #ifdef HAVE_SYS_UTSNAME_H
64 # include <sys/utsname.h>
85 int ip_default_family = AF_UNSPEC;
87 int ip_default_family = AF_INET;
90 /* Mapping between known hosts and to lists of their addresses. */
92 static struct hash_table *host_name_addresses_map;
94 /* Lists of addresses. This should eventually be extended to handle
98 int count; /* number of adrresses */
99 ip_address *addresses; /* pointer to the string of addresses */
101 int faulty; /* number of addresses known not to work. */
102 int refcount; /* so we know whether to free it or not. */
105 /* Get the bounds of the address list. */
108 address_list_get_bounds (const struct address_list *al, int *start, int *end)
114 /* Copy address number INDEX to IP_STORE. */
117 address_list_copy_one (const struct address_list *al, int index, ip_address *ip_store)
119 assert (index >= al->faulty && index < al->count);
120 memcpy (ip_store, al->addresses + index, sizeof (ip_address));
123 /* Check whether two address lists have all their IPs in common. */
126 address_list_match_all (const struct address_list *al1, const struct address_list *al2)
131 if (al1->count != al2->count)
133 for (i = 0; i < al1->count; ++i)
136 if (al1->addresses[i].type != al2->addresses[i].type)
138 if (al1->addresses[i].type == IPv6_ADDRESS)
140 const struct in6_addr *addr1 = &al1->addresses[i].addr.ipv6.addr;
141 const struct in6_addr *addr2 = &al2->addresses[i].addr.ipv6.addr;
143 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
144 if ((al1->addresses[i].address.scope_id
145 != al2->addresses[i].address.scope_id)
146 || !IN6_ARE_ADDR_EQUAL (addr1, addr2))
148 if (!IN6_ARE_ADDR_EQUAL (addr1, addr2))
155 const struct in_addr *addr1 = (const struct in_addr *)&al1->addresses[i].addr.ipv4.addr;
156 const struct in_addr *addr2 = (const struct in_addr *)&al2->addresses[i].addr.ipv4.addr;
158 if (addr1->s_addr != addr2->s_addr)
165 /* Mark the INDEXth element of AL as faulty, so that the next time
166 this address list is used, the faulty element will be skipped. */
169 address_list_set_faulty (struct address_list *al, int index)
171 /* We assume that the address list is traversed in order, so that a
172 "faulty" attempt is always preceded with all-faulty addresses,
173 and this is how Wget uses it. */
174 assert (index == al->faulty);
177 if (al->faulty >= al->count)
178 /* All addresses have been proven faulty. Since there's not much
179 sense in returning the user an empty address list the next
180 time, we'll rather make them all clean, so that they can be
187 * address_list_from_addrinfo
189 * This function transform an addrinfo links list in and address_list.
192 * addrinfo* Linkt list of addrinfo
195 * address_list* New allocated address_list
197 static struct address_list *
198 address_list_from_addrinfo (const struct addrinfo *ai)
200 struct address_list *al;
201 const struct addrinfo *ptr;
205 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
206 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
211 al = xmalloc (sizeof (struct address_list));
212 al->addresses = xmalloc (cnt * sizeof (ip_address));
217 for (i = 0, ptr = ai; ptr != NULL; ptr = ptr->ai_next)
218 if (ptr->ai_family == AF_INET6)
220 const struct sockaddr_in6 *sin6 =
221 (const struct sockaddr_in6 *)ptr->ai_addr;
222 al->addresses[i].addr.ipv6.addr = sin6->sin6_addr;
223 al->addresses[i].type = IPv6_ADDRESS;
224 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
225 al->addresses[i].addr.ipv6.scope_id = sin6->sin6_scope_id;
229 else if (ptr->ai_family == AF_INET)
231 const struct sockaddr_in *sin =
232 (const struct sockaddr_in *)ptr->ai_addr;
233 al->addresses[i].addr.ipv4.addr = sin->sin_addr;
234 al->addresses[i].type = IPv4_ADDRESS;
241 /* Create an address_list out of a NULL-terminated vector of
242 addresses, as returned by gethostbyname. */
243 static struct address_list *
244 address_list_from_vector (char **h_addr_list)
248 struct address_list *al = xmalloc (sizeof (struct address_list));
250 while (h_addr_list[count])
255 al->addresses = xmalloc (count * sizeof (ip_address));
258 for (i = 0; i < count; i++) {
259 /* Mauro Tortonesi: is this safe? */
260 memcpy (&((al->addresses + i)->addr.ipv4.addr.s_addr), h_addr_list[i], 4);
261 (al->addresses + i)->type = IPv4_ADDRESS;
267 /* Like address_list_from_vector, but initialized with a single
270 static struct address_list *
271 address_list_from_single (const ip_address *addr)
273 struct address_list *al = xmalloc (sizeof (struct address_list));
276 al->addresses = xmalloc (sizeof (ip_address));
278 memcpy (al->addresses, addr, sizeof (ip_address));
285 address_list_delete (struct address_list *al)
287 xfree (al->addresses);
292 address_list_release (struct address_list *al)
295 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
296 if (al->refcount <= 0)
298 DEBUGP (("Deleting unused %p.\n", al));
299 address_list_delete (al);
304 * sockaddr_set_address
306 * This function takes a sockaddr struct and fills in the protocol type,
307 * the port number and the address. If ENABLE_IPV6 is defined, the sa
308 * parameter should point to a sockaddr_storage structure; if not, it
309 * should point to a sockaddr_in structure.
310 * If the address parameter is NULL, the function will use the unspecified
311 * address (0.0.0.0 for IPv4 and :: for IPv6).
312 * Unsupported address family will abort the whole programm.
315 * struct sockaddr* The space to be filled
316 * unsigned short The port
317 * const ip_address The IP address
320 * - Only modifies 1st parameter.
323 sockaddr_set_address (struct sockaddr *sa, unsigned short port,
324 const ip_address *addr)
326 if (addr->type == IPv4_ADDRESS)
328 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
330 sin->sin_family = AF_INET;
331 sin->sin_port = htons (port);
333 sin->sin_addr.s_addr = INADDR_ANY;
335 sin->sin_addr = addr->addr.ipv4.addr;
338 else if (addr->type == IPv6_ADDRESS)
340 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
342 sin6->sin6_family = AF_INET6;
343 sin6->sin6_port = htons (port);
345 sin6->sin6_addr = in6addr_any;
347 sin6->sin6_addr = addr->addr.ipv6.addr;
348 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
349 sin6->sin6_scope_id = addr->addr.ipv6.scope_id;
350 #endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
352 #endif /* ENABLE_IPV6 */
358 sockaddr_get_address (const struct sockaddr *sa, unsigned short *port,
361 if (sa->sa_family == AF_INET)
363 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
365 addr->type = IPv4_ADDRESS;
366 addr->addr.ipv4.addr = sin->sin_addr;
368 *port = ntohs (sin->sin_port);
371 else if (sa->sa_family == AF_INET6)
373 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
375 addr->type = IPv6_ADDRESS;
376 addr->addr.ipv6.addr = sin6->sin6_addr;
377 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
378 addr->addr.ipv6.scope_id = sin6->sin6_scope_id;
381 *port = ntohs (sin6->sin6_port);
388 #if 0 /* currently unused */
392 * This funtion only fill the port of the socket information.
393 * If the protocol is not supported nothing is done.
394 * Unsuported adress family will abort the whole programm.
397 * that the IP-Protocol already is set.
400 * wget_sockaddr* The space there port should be entered
401 * unsigned int The port that should be entered in host order
404 * - Only modify 1. param
407 sockaddr_set_port (struct sockaddr *sa, unsigned short port)
409 if (sa->sa_family == AF_INET)
411 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
412 sin->sin_port = htons (port);
415 else if (sa->sa_family == AF_INET6)
417 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
418 sin6->sin6_port = htons (port);
429 * This function only return the port from the input structure
430 * Unsuported adress family will abort the whole programm.
433 * that the IP-Protocol already is set.
436 * wget_sockaddr* Information where to get the port
439 * unsigned short Port Number in host order.
442 sockaddr_get_port (const struct sockaddr *sa)
444 if (sa->sa_family == AF_INET) {
445 const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
446 return htons (sin->sin_port);
448 } else if (sa->sa_family == AF_INET6) {
449 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
450 return htons (sin6->sin6_port);
454 /* do not complain about return nothing */
461 * This function return the length of the sockaddr corresponding to
462 * the acutall prefered protocol for (bind, connect etc...)
463 * Unsuported adress family will abort the whole programm.
466 * that the IP-Protocol already is set.
469 * - Public IP-Family Information
472 * int structure length for socket options
475 sockaddr_len (const struct sockaddr *sa)
477 if (sa->sa_family == AF_INET)
479 return sizeof (struct sockaddr_in);
482 else if (sa->sa_family == AF_INET6)
484 return sizeof (struct sockaddr_in6);
489 /* do not complain about return nothing */
493 /* Versions of gethostbyname and getaddrinfo that support timeout. */
497 struct ghbnwt_context {
498 const char *host_name;
499 struct hostent *hptr;
503 gethostbyname_with_timeout_callback (void *arg)
505 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
506 ctx->hptr = gethostbyname (ctx->host_name);
509 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
510 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
511 The function makes sure that when NULL is returned for reasons
512 other than timeout, errno is reset. */
514 static struct hostent *
515 gethostbyname_with_timeout (const char *host_name, double timeout)
517 struct ghbnwt_context ctx;
518 ctx.host_name = host_name;
519 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
521 SET_H_ERRNO (HOST_NOT_FOUND);
530 #else /* ENABLE_IPV6 */
532 struct gaiwt_context {
535 const struct addrinfo *hints;
536 struct addrinfo **res;
541 getaddrinfo_with_timeout_callback (void *arg)
543 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
544 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
547 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
548 In case of timeout, the EAI_SYSTEM error code is returned and errno
549 is set to ETIMEDOUT. */
552 getaddrinfo_with_timeout (const char *node, const char *service,
553 const struct addrinfo *hints, struct addrinfo **res,
556 struct gaiwt_context ctx;
558 ctx.service = service;
562 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
567 return ctx.exit_code;
570 #endif /* ENABLE_IPV6 */
572 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
573 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
577 pretty_print_address (const ip_address *addr)
582 return inet_ntoa (addr->addr.ipv4.addr);
587 static char buf[128];
588 inet_ntop (AF_INET6, &addr->addr.ipv6.addr, buf, sizeof (buf));
590 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
591 /* print also scope_id for all ?non-global? addresses */
592 snprintf (buf + len, sizeof (buf) - len, "%%%d", addr->addr.ipv6.scope_id);
596 buf[sizeof (buf) - 1] = '\0';
605 /* Add host name HOST with the address ADDR_TEXT to the cache.
606 ADDR_LIST is a NULL-terminated list of addresses, as in struct
610 cache_host_lookup (const char *host, struct address_list *al)
612 if (!host_name_addresses_map)
613 host_name_addresses_map = make_nocase_string_hash_table (0);
616 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
622 debug_logprintf ("Caching %s =>", host);
623 for (i = 0; i < al->count; i++)
624 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
625 debug_logprintf ("\n");
630 struct address_list *
631 lookup_host (const char *host, int flags)
633 struct address_list *al = NULL;
637 struct addrinfo hints, *res;
639 /* This ip_default_family+flags business looks like bad design to
640 me. This function should rather accept a FAMILY argument. */
641 if (flags & LH_IPv4_ONLY)
643 else if (flags & LH_IPv6_ONLY)
646 family = ip_default_family;
649 /* First, try to check whether the address is already a numeric
650 address. Where getaddrinfo is available, we do it using the
651 AI_NUMERICHOST flag. Without IPv6, we check whether inet_addr
655 memset (&hints, 0, sizeof (hints));
656 hints.ai_family = family;
657 hints.ai_socktype = SOCK_STREAM;
658 hints.ai_flags = AI_NUMERICHOST;
659 if (flags & LH_PASSIVE)
660 hints.ai_flags = AI_PASSIVE;
662 /* no need to call getaddrinfo_with_timeout here, as we're not
663 * relying on the DNS, but we're only doing an address translation
664 * from presentation (ASCII) to network format */
665 err = getaddrinfo (host, NULL, &hints, &res);
666 if (err == 0 && res != NULL)
668 al = address_list_from_addrinfo (res);
674 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
675 if (addr_ipv4 != (uint32_t) -1)
677 /* The return value of inet_addr is in network byte order, so
678 we can just copy it to ADDR. */
680 /* This has a huge number of dereferences because C doesn't
681 support anonymous unions and because struct in_addr adds a
683 addr.addr.ipv4.addr.s_addr = addr_ipv4;
684 addr.type = IPv4_ADDRESS;
685 return address_list_from_single (&addr);
690 /* Then, try to find the host in the cache. */
692 if (host_name_addresses_map)
694 al = hash_table_get (host_name_addresses_map, host);
697 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
703 if (!(flags & LH_SILENT))
704 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
706 /* Host name lookup goes on below. */
710 memset (&hints, 0, sizeof (hints));
711 hints.ai_family = family;
712 hints.ai_socktype = SOCK_STREAM;
713 if (flags & LH_PASSIVE)
714 hints.ai_flags = AI_PASSIVE;
716 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
718 if (err != 0 || res == NULL)
720 if (!(flags & LH_SILENT))
721 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
722 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
725 al = address_list_from_addrinfo (res);
730 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
733 if (!(flags & LH_SILENT))
735 if (errno != ETIMEDOUT)
736 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
738 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
742 assert (hptr->h_length == 4);
743 /* Do all systems have h_addr_list, or is it a newer thing? If
744 the latter, use address_list_from_single. */
745 al = address_list_from_vector (hptr->h_addr_list);
749 /* Print the addresses determined by DNS lookup, but no more than
751 if (!(flags & LH_SILENT))
754 int printmax = al->count <= 3 ? al->count : 3;
755 for (i = 0; i < printmax; i++)
757 logprintf (LOG_VERBOSE, "%s",
758 pretty_print_address (al->addresses + i));
759 if (i < printmax - 1)
760 logputs (LOG_VERBOSE, ", ");
762 if (printmax != al->count)
763 logputs (LOG_VERBOSE, ", ...");
764 logputs (LOG_VERBOSE, "\n");
767 /* Cache the lookup information. */
769 cache_host_lookup (host, al);
774 /* Determine whether a URL is acceptable to be followed, according to
775 a list of domains to accept. */
777 accept_domain (struct url *u)
779 assert (u->host != NULL);
782 if (!sufmatch ((const char **)opt.domains, u->host))
785 if (opt.exclude_domains)
787 if (sufmatch ((const char **)opt.exclude_domains, u->host))
793 /* Check whether WHAT is matched in LIST, each element of LIST being a
794 pattern to match WHAT against, using backward matching (see
795 match_backwards() in utils.c).
797 If an element of LIST matched, 1 is returned, 0 otherwise. */
799 sufmatch (const char **list, const char *what)
804 for (i = 0; list[i]; i++)
806 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
807 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
809 /* The domain must be first to reach to beginning. */
816 /* Print error messages for host errors. */
820 /* Can't use switch since some constants are equal (at least on my
821 system), and the compiler signals "duplicate case value". */
822 if (error == HOST_NOT_FOUND
823 || error == NO_RECOVERY
825 || error == NO_ADDRESS
826 || error == TRY_AGAIN)
827 return _("Host not found");
829 return _("Unknown error");
833 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
835 struct address_list *al;
837 xfree (key); /* host */
839 al = (struct address_list *)value;
840 assert (al->refcount == 1);
841 address_list_delete (al);
849 if (host_name_addresses_map)
851 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
852 hash_table_destroy (host_name_addresses_map);
853 host_name_addresses_map = NULL;