1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 #define NO_ADDRESS NO_DATA
63 #ifdef HAVE_SYS_UTSNAME_H
64 # include <sys/utsname.h>
85 int ip_default_family = AF_UNSPEC;
87 int ip_default_family = AF_INET;
90 /* Mapping between known hosts and to lists of their addresses. */
92 static struct hash_table *host_name_addresses_map;
94 /* Lists of addresses. This should eventually be extended to handle
98 int count; /* number of adrresses */
99 ip_address *addresses; /* pointer to the string of addresses */
101 int faulty; /* number of addresses known not to work. */
102 int from_cache; /* whether this entry was pulled from
103 cache or freshly looked up. */
105 int refcount; /* reference count; when it drops to
106 0, the entry is freed. */
109 /* Get the bounds of the address list. */
112 address_list_get_bounds (const struct address_list *al, int *start, int *end)
118 /* Return whether this address list entry has been obtained from the
122 address_list_cached_p (const struct address_list *al)
124 return al->from_cache;
127 /* Return a pointer to the address at position POS. */
130 address_list_address_at (const struct address_list *al, int pos)
132 assert (pos >= al->faulty && pos < al->count);
133 return al->addresses + pos;
136 /* Check whether two address lists have all their IPs in common. */
139 address_list_match_all (const struct address_list *al1,
140 const struct address_list *al2)
147 if (al1->count != al2->count)
150 /* For the comparison to be complete, we'd need to sort the IP
151 addresses first. But that's not necessary because this is only
152 used as an optimization. */
155 /* In the non-IPv6 case, there is only one address type, so we can
156 compare the whole array with memcmp. */
157 return 0 == memcmp (al1->addresses, al2->addresses,
158 al1->count * sizeof (ip_address));
159 #else /* ENABLE_IPV6 */
160 for (i = 0; i < al1->count; ++i)
162 const ip_address *ip1 = &al1->addresses[i];
163 const ip_address *ip2 = &al2->addresses[i];
165 if (ip1->type != ip2->type)
171 if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr
172 != ADDRESS_IPV4_IN_ADDR (ip2).s_addr)
176 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
177 if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2))
179 #endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
180 if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1),
181 &ADDRESS_IPV6_IN6_ADDR (ip2)))
189 #endif /* ENABLE_IPV6 */
192 /* Mark the INDEXth element of AL as faulty, so that the next time
193 this address list is used, the faulty element will be skipped. */
196 address_list_set_faulty (struct address_list *al, int index)
198 /* We assume that the address list is traversed in order, so that a
199 "faulty" attempt is always preceded with all-faulty addresses,
200 and this is how Wget uses it. */
201 assert (index == al->faulty);
204 if (al->faulty >= al->count)
205 /* All addresses have been proven faulty. Since there's not much
206 sense in returning the user an empty address list the next
207 time, we'll rather make them all clean, so that they can be
214 * address_list_from_addrinfo
216 * This function transform an addrinfo links list in and address_list.
219 * addrinfo* Linked list of addrinfo
222 * address_list* New allocated address_list
224 static struct address_list *
225 address_list_from_addrinfo (const struct addrinfo *ai)
227 struct address_list *al;
228 const struct addrinfo *ptr;
233 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
234 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
239 al = xmalloc (sizeof (struct address_list));
240 al->addresses = xmalloc (cnt * sizeof (ip_address));
247 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
248 if (ptr->ai_family == AF_INET6)
250 const struct sockaddr_in6 *sin6 =
251 (const struct sockaddr_in6 *)ptr->ai_addr;
252 ip->type = IPV6_ADDRESS;
253 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
254 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
255 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
259 else if (ptr->ai_family == AF_INET)
261 const struct sockaddr_in *sin =
262 (const struct sockaddr_in *)ptr->ai_addr;
263 ip->type = IPV4_ADDRESS;
264 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
267 assert (ip - al->addresses == cnt);
271 /* Create an address_list from a NULL-terminated vector of IPv4
272 addresses. This kind of vector is returned by gethostbyname. */
274 static struct address_list *
275 address_list_from_ipv4_addresses (char **h_addr_list)
278 struct address_list *al = xmalloc (sizeof (struct address_list));
281 while (h_addr_list[count])
287 al->addresses = xmalloc (count * sizeof (ip_address));
291 for (i = 0; i < count; i++)
293 ip_address *ip = &al->addresses[i];
294 ip->type = IPV4_ADDRESS;
295 memcpy (ADDRESS_IPV4_DATA (ip), h_addr_list[i], 4);
303 address_list_delete (struct address_list *al)
305 xfree (al->addresses);
310 address_list_release (struct address_list *al)
313 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
314 if (al->refcount <= 0)
316 DEBUGP (("Deleting unused %p.\n", al));
317 address_list_delete (al);
321 /* Versions of gethostbyname and getaddrinfo that support timeout. */
325 struct ghbnwt_context {
326 const char *host_name;
327 struct hostent *hptr;
331 gethostbyname_with_timeout_callback (void *arg)
333 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
334 ctx->hptr = gethostbyname (ctx->host_name);
337 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
338 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
339 The function makes sure that when NULL is returned for reasons
340 other than timeout, errno is reset. */
342 static struct hostent *
343 gethostbyname_with_timeout (const char *host_name, double timeout)
345 struct ghbnwt_context ctx;
346 ctx.host_name = host_name;
347 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
349 SET_H_ERRNO (HOST_NOT_FOUND);
358 #else /* ENABLE_IPV6 */
360 struct gaiwt_context {
363 const struct addrinfo *hints;
364 struct addrinfo **res;
369 getaddrinfo_with_timeout_callback (void *arg)
371 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
372 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
375 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
376 In case of timeout, the EAI_SYSTEM error code is returned and errno
377 is set to ETIMEDOUT. */
380 getaddrinfo_with_timeout (const char *node, const char *service,
381 const struct addrinfo *hints, struct addrinfo **res,
384 struct gaiwt_context ctx;
386 ctx.service = service;
390 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
395 return ctx.exit_code;
398 #endif /* ENABLE_IPV6 */
400 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
401 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
405 pretty_print_address (const ip_address *addr)
410 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
414 static char buf[128];
415 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
417 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
419 /* append "%SCOPE_ID" for all ?non-global? addresses */
420 char *p = buf + strlen (buf);
422 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
426 buf[sizeof (buf) - 1] = '\0';
435 /* Add host name HOST with the address ADDR_TEXT to the cache.
436 ADDR_LIST is a NULL-terminated list of addresses, as in struct
440 cache_host_lookup (const char *host, struct address_list *al)
442 if (!host_name_addresses_map)
443 host_name_addresses_map = make_nocase_string_hash_table (0);
446 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
452 debug_logprintf ("Caching %s =>", host);
453 for (i = 0; i < al->count; i++)
454 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
455 debug_logprintf ("\n");
461 forget_host_lookup (const char *host)
463 struct address_list *al = hash_table_get (host_name_addresses_map, host);
466 address_list_release (al);
467 hash_table_remove (host_name_addresses_map, host);
471 struct address_list *
472 lookup_host (const char *host, int flags)
474 struct address_list *al = NULL;
478 struct addrinfo hints, *res;
480 /* Is this necessary? Should this function be changed to accept a
482 if (flags & LH_IPV4_ONLY)
484 else if (flags & LH_IPV6_ONLY)
487 family = ip_default_family;
490 /* First, try to check whether the address is already a numeric
491 address, in which case we don't need to cache it or bother with
492 setting up timeouts. Plus, if memory serves me right, Ultrix's
493 gethostbyname can't handle numeric addresses (!).
495 Where getaddrinfo is available, we do it using the AI_NUMERICHOST
496 flag. Without IPv6, we use inet_addr succeeds. */
499 memset (&hints, 0, sizeof (hints));
500 hints.ai_family = family;
501 hints.ai_socktype = SOCK_STREAM;
502 hints.ai_flags = AI_NUMERICHOST;
503 if (flags & LH_PASSIVE)
504 hints.ai_flags = AI_PASSIVE;
506 /* no need to call getaddrinfo_with_timeout here, as we're not
507 * relying on the DNS, but we're only doing an address translation
508 * from presentation (ASCII) to network format */
509 err = getaddrinfo (host, NULL, &hints, &res);
510 if (err == 0 && res != NULL)
512 al = address_list_from_addrinfo (res);
518 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
519 if (addr_ipv4 != (uint32_t) -1)
521 /* The return value of inet_addr is in network byte order, so
522 we can just copy it to IP. */
524 vec[0] = (char *)&addr_ipv4;
526 return address_list_from_ipv4_addresses (vec);
531 /* Then, try to find the host in the cache. */
533 if (host_name_addresses_map)
535 al = hash_table_get (host_name_addresses_map, host);
538 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
545 if (!(flags & LH_SILENT))
546 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
548 /* Host name lookup goes on below. */
552 memset (&hints, 0, sizeof (hints));
553 hints.ai_family = family;
554 hints.ai_socktype = SOCK_STREAM;
555 if (flags & LH_PASSIVE)
556 hints.ai_flags = AI_PASSIVE;
558 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
560 if (err != 0 || res == NULL)
562 if (!(flags & LH_SILENT))
563 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
564 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
567 al = address_list_from_addrinfo (res);
572 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
575 if (!(flags & LH_SILENT))
577 if (errno != ETIMEDOUT)
578 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
580 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
584 assert (hptr->h_length == 4);
585 /* Do older systems have h_addr_list? */
586 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
590 /* Print the addresses determined by DNS lookup, but no more than
592 if (!(flags & LH_SILENT))
595 int printmax = al->count <= 3 ? al->count : 3;
596 for (i = 0; i < printmax; i++)
598 logprintf (LOG_VERBOSE, "%s",
599 pretty_print_address (al->addresses + i));
600 if (i < printmax - 1)
601 logputs (LOG_VERBOSE, ", ");
603 if (printmax != al->count)
604 logputs (LOG_VERBOSE, ", ...");
605 logputs (LOG_VERBOSE, "\n");
608 /* Cache the lookup information. */
610 cache_host_lookup (host, al);
615 /* Determine whether a URL is acceptable to be followed, according to
616 a list of domains to accept. */
618 accept_domain (struct url *u)
620 assert (u->host != NULL);
623 if (!sufmatch ((const char **)opt.domains, u->host))
626 if (opt.exclude_domains)
628 if (sufmatch ((const char **)opt.exclude_domains, u->host))
634 /* Check whether WHAT is matched in LIST, each element of LIST being a
635 pattern to match WHAT against, using backward matching (see
636 match_backwards() in utils.c).
638 If an element of LIST matched, 1 is returned, 0 otherwise. */
640 sufmatch (const char **list, const char *what)
645 for (i = 0; list[i]; i++)
647 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
648 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
650 /* The domain must be first to reach to beginning. */
657 /* Print error messages for host errors. */
661 /* Can't use switch since some constants are equal (at least on my
662 system), and the compiler signals "duplicate case value". */
663 if (error == HOST_NOT_FOUND
664 || error == NO_RECOVERY
666 || error == NO_ADDRESS
667 || error == TRY_AGAIN)
668 return _("Host not found");
670 return _("Unknown error");
674 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
676 struct address_list *al;
678 xfree (key); /* host */
680 al = (struct address_list *)value;
681 assert (al->refcount == 1);
682 address_list_delete (al);
690 if (host_name_addresses_map)
692 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
693 hash_table_destroy (host_name_addresses_map);
694 host_name_addresses_map = NULL;