1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 #define NO_ADDRESS NO_DATA
63 #ifdef HAVE_SYS_UTSNAME_H
64 # include <sys/utsname.h>
85 int ip_default_family = AF_UNSPEC;
87 int ip_default_family = AF_INET;
90 /* Mapping between known hosts and to lists of their addresses. */
92 static struct hash_table *host_name_addresses_map;
94 /* Lists of addresses. This should eventually be extended to handle
98 int count; /* number of adrresses */
99 ip_address *addresses; /* pointer to the string of addresses */
101 int faulty; /* number of addresses known not to work. */
102 int from_cache; /* whether this entry was pulled from
103 cache or freshly looked up. */
105 int refcount; /* reference count; when it drops to
106 0, the entry is freed. */
109 /* Get the bounds of the address list. */
112 address_list_get_bounds (const struct address_list *al, int *start, int *end)
118 /* Return whether this address list entry has been obtained from the
122 address_list_cached_p (const struct address_list *al)
124 return al->from_cache;
127 /* Return a pointer to the address at position POS. */
130 address_list_address_at (const struct address_list *al, int pos)
132 assert (pos >= al->faulty && pos < al->count);
133 return al->addresses + pos;
136 /* Check whether two address lists have all their IPs in common. */
139 address_list_match_all (const struct address_list *al1,
140 const struct address_list *al2)
147 if (al1->count != al2->count)
150 /* For the comparison to be complete, we'd need to sort the IP
151 addresses first. But that's not necessary because this is only
152 used as an optimization. */
155 /* In the non-IPv6 case, there is only one address type, so we can
156 compare the whole array with memcmp. */
157 return 0 == memcmp (al1->addresses, al2->addresses,
158 al1->count * sizeof (ip_address));
159 #else /* ENABLE_IPV6 */
160 for (i = 0; i < al1->count; ++i)
162 const ip_address *ip1 = &al1->addresses[i];
163 const ip_address *ip2 = &al2->addresses[i];
165 if (ip1->type != ip2->type)
171 if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr
172 != ADDRESS_IPV4_IN_ADDR (ip2).s_addr)
176 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
177 if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2))
179 #endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
180 if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1),
181 &ADDRESS_IPV6_IN6_ADDR (ip2)))
189 #endif /* ENABLE_IPV6 */
192 /* Mark the INDEXth element of AL as faulty, so that the next time
193 this address list is used, the faulty element will be skipped. */
196 address_list_set_faulty (struct address_list *al, int index)
198 /* We assume that the address list is traversed in order, so that a
199 "faulty" attempt is always preceded with all-faulty addresses,
200 and this is how Wget uses it. */
201 assert (index == al->faulty);
204 if (al->faulty >= al->count)
205 /* All addresses have been proven faulty. Since there's not much
206 sense in returning the user an empty address list the next
207 time, we'll rather make them all clean, so that they can be
214 * address_list_from_addrinfo
216 * This function transform an addrinfo links list in and address_list.
219 * addrinfo* Linked list of addrinfo
222 * address_list* New allocated address_list
224 static struct address_list *
225 address_list_from_addrinfo (const struct addrinfo *ai)
227 struct address_list *al;
228 const struct addrinfo *ptr;
233 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
234 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
239 al = xmalloc (sizeof (struct address_list));
240 al->addresses = xmalloc (cnt * sizeof (ip_address));
247 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
248 if (ptr->ai_family == AF_INET6)
250 const struct sockaddr_in6 *sin6 =
251 (const struct sockaddr_in6 *)ptr->ai_addr;
252 ip->type = IPV6_ADDRESS;
253 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
254 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
255 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
259 else if (ptr->ai_family == AF_INET)
261 const struct sockaddr_in *sin =
262 (const struct sockaddr_in *)ptr->ai_addr;
263 ip->type = IPV4_ADDRESS;
264 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
267 assert (ip - al->addresses == cnt);
271 /* Create an address_list out of a NULL-terminated vector of
272 addresses, as returned by gethostbyname. */
273 static struct address_list *
274 address_list_from_vector (char **h_addr_list)
277 struct address_list *al = xmalloc (sizeof (struct address_list));
280 while (h_addr_list[count])
286 al->addresses = xmalloc (count * sizeof (ip_address));
290 for (i = 0; i < count; i++)
292 ip_address *ip = &al->addresses[i];
293 ip->type = IPV4_ADDRESS;
294 memcpy (ADDRESS_IPV4_DATA (ip), h_addr_list[i], 4);
300 /* Like address_list_from_vector, but initialized with a single
303 static struct address_list *
304 address_list_from_single (const ip_address *addr)
306 struct address_list *al = xmalloc (sizeof (struct address_list));
309 al->addresses = xmalloc (sizeof (ip_address));
312 memcpy (al->addresses, addr, sizeof (ip_address));
319 address_list_delete (struct address_list *al)
321 xfree (al->addresses);
326 address_list_release (struct address_list *al)
329 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
330 if (al->refcount <= 0)
332 DEBUGP (("Deleting unused %p.\n", al));
333 address_list_delete (al);
337 /* Versions of gethostbyname and getaddrinfo that support timeout. */
341 struct ghbnwt_context {
342 const char *host_name;
343 struct hostent *hptr;
347 gethostbyname_with_timeout_callback (void *arg)
349 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
350 ctx->hptr = gethostbyname (ctx->host_name);
353 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
354 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
355 The function makes sure that when NULL is returned for reasons
356 other than timeout, errno is reset. */
358 static struct hostent *
359 gethostbyname_with_timeout (const char *host_name, double timeout)
361 struct ghbnwt_context ctx;
362 ctx.host_name = host_name;
363 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
365 SET_H_ERRNO (HOST_NOT_FOUND);
374 #else /* ENABLE_IPV6 */
376 struct gaiwt_context {
379 const struct addrinfo *hints;
380 struct addrinfo **res;
385 getaddrinfo_with_timeout_callback (void *arg)
387 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
388 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
391 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
392 In case of timeout, the EAI_SYSTEM error code is returned and errno
393 is set to ETIMEDOUT. */
396 getaddrinfo_with_timeout (const char *node, const char *service,
397 const struct addrinfo *hints, struct addrinfo **res,
400 struct gaiwt_context ctx;
402 ctx.service = service;
406 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
411 return ctx.exit_code;
414 #endif /* ENABLE_IPV6 */
416 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
417 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
421 pretty_print_address (const ip_address *addr)
426 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
430 static char buf[128];
431 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
433 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
435 /* append "%SCOPE_ID" for all ?non-global? addresses */
436 char *p = buf + strlen (buf);
438 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
442 buf[sizeof (buf) - 1] = '\0';
451 /* Add host name HOST with the address ADDR_TEXT to the cache.
452 ADDR_LIST is a NULL-terminated list of addresses, as in struct
456 cache_host_lookup (const char *host, struct address_list *al)
458 if (!host_name_addresses_map)
459 host_name_addresses_map = make_nocase_string_hash_table (0);
462 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
468 debug_logprintf ("Caching %s =>", host);
469 for (i = 0; i < al->count; i++)
470 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
471 debug_logprintf ("\n");
477 forget_host_lookup (const char *host)
479 struct address_list *al = hash_table_get (host_name_addresses_map, host);
482 address_list_release (al);
483 hash_table_remove (host_name_addresses_map, host);
487 struct address_list *
488 lookup_host (const char *host, int flags)
490 struct address_list *al = NULL;
494 struct addrinfo hints, *res;
496 /* Is this necessary? Should this function be changed to accept a
498 if (flags & LH_IPV4_ONLY)
500 else if (flags & LH_IPV6_ONLY)
503 family = ip_default_family;
506 /* First, try to check whether the address is already a numeric
507 address, in which case we don't need to cache it or bother with
508 setting up timeouts. Plus, if memory serves me right, Ultrix's
509 gethostbyname can't handle numeric addresses (!).
511 Where getaddrinfo is available, we do it using the AI_NUMERICHOST
512 flag. Without IPv6, we use inet_addr succeeds. */
515 memset (&hints, 0, sizeof (hints));
516 hints.ai_family = family;
517 hints.ai_socktype = SOCK_STREAM;
518 hints.ai_flags = AI_NUMERICHOST;
519 if (flags & LH_PASSIVE)
520 hints.ai_flags = AI_PASSIVE;
522 /* no need to call getaddrinfo_with_timeout here, as we're not
523 * relying on the DNS, but we're only doing an address translation
524 * from presentation (ASCII) to network format */
525 err = getaddrinfo (host, NULL, &hints, &res);
526 if (err == 0 && res != NULL)
528 al = address_list_from_addrinfo (res);
534 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
535 if (addr_ipv4 != (uint32_t) -1)
537 /* The return value of inet_addr is in network byte order, so
538 we can just copy it to IP. */
540 ip.type = IPV4_ADDRESS;
541 memcpy (ADDRESS_IPV4_DATA (&ip), &addr_ipv4, 4);
542 return address_list_from_single (&ip);
547 /* Then, try to find the host in the cache. */
549 if (host_name_addresses_map)
551 al = hash_table_get (host_name_addresses_map, host);
554 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
561 if (!(flags & LH_SILENT))
562 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
564 /* Host name lookup goes on below. */
568 memset (&hints, 0, sizeof (hints));
569 hints.ai_family = family;
570 hints.ai_socktype = SOCK_STREAM;
571 if (flags & LH_PASSIVE)
572 hints.ai_flags = AI_PASSIVE;
574 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
576 if (err != 0 || res == NULL)
578 if (!(flags & LH_SILENT))
579 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
580 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
583 al = address_list_from_addrinfo (res);
588 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
591 if (!(flags & LH_SILENT))
593 if (errno != ETIMEDOUT)
594 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
596 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
600 assert (hptr->h_length == 4);
601 /* Do all systems have h_addr_list, or is it a newer thing? If
602 the latter, use address_list_from_single. */
603 al = address_list_from_vector (hptr->h_addr_list);
607 /* Print the addresses determined by DNS lookup, but no more than
609 if (!(flags & LH_SILENT))
612 int printmax = al->count <= 3 ? al->count : 3;
613 for (i = 0; i < printmax; i++)
615 logprintf (LOG_VERBOSE, "%s",
616 pretty_print_address (al->addresses + i));
617 if (i < printmax - 1)
618 logputs (LOG_VERBOSE, ", ");
620 if (printmax != al->count)
621 logputs (LOG_VERBOSE, ", ...");
622 logputs (LOG_VERBOSE, "\n");
625 /* Cache the lookup information. */
627 cache_host_lookup (host, al);
632 /* Determine whether a URL is acceptable to be followed, according to
633 a list of domains to accept. */
635 accept_domain (struct url *u)
637 assert (u->host != NULL);
640 if (!sufmatch ((const char **)opt.domains, u->host))
643 if (opt.exclude_domains)
645 if (sufmatch ((const char **)opt.exclude_domains, u->host))
651 /* Check whether WHAT is matched in LIST, each element of LIST being a
652 pattern to match WHAT against, using backward matching (see
653 match_backwards() in utils.c).
655 If an element of LIST matched, 1 is returned, 0 otherwise. */
657 sufmatch (const char **list, const char *what)
662 for (i = 0; list[i]; i++)
664 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
665 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
667 /* The domain must be first to reach to beginning. */
674 /* Print error messages for host errors. */
678 /* Can't use switch since some constants are equal (at least on my
679 system), and the compiler signals "duplicate case value". */
680 if (error == HOST_NOT_FOUND
681 || error == NO_RECOVERY
683 || error == NO_ADDRESS
684 || error == TRY_AGAIN)
685 return _("Host not found");
687 return _("Unknown error");
691 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
693 struct address_list *al;
695 xfree (key); /* host */
697 al = (struct address_list *)value;
698 assert (al->refcount == 1);
699 address_list_delete (al);
707 if (host_name_addresses_map)
709 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
710 hash_table_destroy (host_name_addresses_map);
711 host_name_addresses_map = NULL;