1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 # define NO_ADDRESS NO_DATA
81 /* Mapping between known hosts and to lists of their addresses. */
83 static struct hash_table *host_name_addresses_map;
85 /* Lists of addresses. This should eventually be extended to handle
89 int count; /* number of adrresses */
90 ip_address *addresses; /* pointer to the string of addresses */
92 int faulty; /* number of addresses known not to work. */
93 int from_cache; /* whether this entry was pulled from
94 cache or freshly looked up. */
96 int refcount; /* reference count; when it drops to
97 0, the entry is freed. */
100 /* Get the bounds of the address list. */
103 address_list_get_bounds (const struct address_list *al, int *start, int *end)
109 /* Return whether this address list entry has been obtained from the
113 address_list_cached_p (const struct address_list *al)
115 return al->from_cache;
118 /* Return a pointer to the address at position POS. */
121 address_list_address_at (const struct address_list *al, int pos)
123 assert (pos >= al->faulty && pos < al->count);
124 return al->addresses + pos;
127 /* Return 1 if IP is one of the addresses in AL. */
130 address_list_find (const struct address_list *al, const ip_address *ip)
136 for (i = 0; i < al->count; i++)
138 ip_address *cur = al->addresses + i;
139 if (cur->type == IPV4_ADDRESS
140 && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
142 ADDRESS_IPV4_IN_ADDR (ip).s_addr))
148 for (i = 0; i < al->count; i++)
150 ip_address *cur = al->addresses + i;
151 if (cur->type == IPV6_ADDRESS
152 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
153 && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
155 && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
156 &ADDRESS_IPV6_IN6_ADDR (ip)))
160 #endif /* ENABLE_IPV6 */
167 /* Mark the INDEXth element of AL as faulty, so that the next time
168 this address list is used, the faulty element will be skipped. */
171 address_list_set_faulty (struct address_list *al, int index)
173 /* We assume that the address list is traversed in order, so that a
174 "faulty" attempt is always preceded with all-faulty addresses,
175 and this is how Wget uses it. */
176 assert (index == al->faulty);
179 if (al->faulty >= al->count)
180 /* All addresses have been proven faulty. Since there's not much
181 sense in returning the user an empty address list the next
182 time, we'll rather make them all clean, so that they can be
189 * address_list_from_addrinfo
191 * This function transform an addrinfo links list in and address_list.
194 * addrinfo* Linked list of addrinfo
197 * address_list* New allocated address_list
199 static struct address_list *
200 address_list_from_addrinfo (const struct addrinfo *ai)
202 struct address_list *al;
203 const struct addrinfo *ptr;
208 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
209 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
214 al = xnew0 (struct address_list);
215 al->addresses = xnew_array (ip_address, cnt);
220 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
221 if (ptr->ai_family == AF_INET6)
223 const struct sockaddr_in6 *sin6 =
224 (const struct sockaddr_in6 *)ptr->ai_addr;
225 ip->type = IPV6_ADDRESS;
226 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
227 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
228 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
232 else if (ptr->ai_family == AF_INET)
234 const struct sockaddr_in *sin =
235 (const struct sockaddr_in *)ptr->ai_addr;
236 ip->type = IPV4_ADDRESS;
237 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
240 assert (ip - al->addresses == cnt);
244 /* Create an address_list from a NULL-terminated vector of IPv4
245 addresses. This kind of vector is returned by gethostbyname. */
247 static struct address_list *
248 address_list_from_ipv4_addresses (char **vec)
251 struct address_list *al = xnew0 (struct address_list);
258 al->addresses = xnew_array (ip_address, count);
262 for (i = 0; i < count; i++)
264 ip_address *ip = &al->addresses[i];
265 ip->type = IPV4_ADDRESS;
266 memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
274 address_list_delete (struct address_list *al)
276 xfree (al->addresses);
281 address_list_release (struct address_list *al)
284 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
285 if (al->refcount <= 0)
287 DEBUGP (("Deleting unused %p.\n", al));
288 address_list_delete (al);
292 /* Versions of gethostbyname and getaddrinfo that support timeout. */
296 struct ghbnwt_context {
297 const char *host_name;
298 struct hostent *hptr;
302 gethostbyname_with_timeout_callback (void *arg)
304 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
305 ctx->hptr = gethostbyname (ctx->host_name);
308 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
309 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
310 The function makes sure that when NULL is returned for reasons
311 other than timeout, errno is reset. */
313 static struct hostent *
314 gethostbyname_with_timeout (const char *host_name, double timeout)
316 struct ghbnwt_context ctx;
317 ctx.host_name = host_name;
318 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
320 SET_H_ERRNO (HOST_NOT_FOUND);
329 #else /* ENABLE_IPV6 */
331 struct gaiwt_context {
334 const struct addrinfo *hints;
335 struct addrinfo **res;
340 getaddrinfo_with_timeout_callback (void *arg)
342 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
343 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
346 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
347 In case of timeout, the EAI_SYSTEM error code is returned and errno
348 is set to ETIMEDOUT. */
351 getaddrinfo_with_timeout (const char *node, const char *service,
352 const struct addrinfo *hints, struct addrinfo **res,
355 struct gaiwt_context ctx;
357 ctx.service = service;
361 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
366 return ctx.exit_code;
369 #endif /* ENABLE_IPV6 */
371 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
372 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
376 pretty_print_address (const ip_address *addr)
381 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
385 static char buf[128];
386 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
388 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
390 /* append "%SCOPE_ID" for all ?non-global? addresses */
391 char *p = buf + strlen (buf);
393 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
397 buf[sizeof (buf) - 1] = '\0';
406 /* Add host name HOST with the address ADDR_TEXT to the cache.
407 ADDR_LIST is a NULL-terminated list of addresses, as in struct
411 cache_host_lookup (const char *host, struct address_list *al)
413 if (!host_name_addresses_map)
414 host_name_addresses_map = make_nocase_string_hash_table (0);
417 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
423 debug_logprintf ("Caching %s =>", host);
424 for (i = 0; i < al->count; i++)
425 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
426 debug_logprintf ("\n");
431 /* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
435 forget_host_lookup (const char *host)
437 struct address_list *al = hash_table_get (host_name_addresses_map, host);
440 address_list_release (al);
441 hash_table_remove (host_name_addresses_map, host);
445 /* Look up HOST in DNS and return a list of IP addresses.
447 This function caches its result so that, if the same host is passed
448 the second time, the addresses are returned without the DNS lookup.
449 If you want to force lookup, call forget_host_lookup() prior to
450 this function, or set opt.dns_cache to 0 to globally disable
453 FLAGS can be a combination of:
454 LH_SILENT - don't print the "resolving ... done" message.
455 LH_IPV4_ONLY - return only IPv4 addresses.
456 LH_IPV6_ONLY - return only IPv6 addresses. */
458 struct address_list *
459 lookup_host (const char *host, int flags)
461 struct address_list *al = NULL;
465 struct addrinfo hints, *res;
468 hints.ai_socktype = SOCK_STREAM;
470 /* Should we inspect opt.<something> directly? */
471 if (flags & LH_IPV4_ONLY)
472 hints.ai_family = AF_INET;
473 else if (flags & LH_IPV6_ONLY)
474 hints.ai_family = AF_INET6;
476 hints.ai_family = AF_UNSPEC;
479 /* First, try to check whether the address is already a numeric
480 address, in which case we don't want to cache it or bother with
481 setting up timeouts. Plus, old (e.g. Ultrix) implementations of
482 gethostbyname can't handle numeric addresses (!).
484 Where getaddrinfo is available, we do it using the AI_NUMERICHOST
485 flag. Without IPv6, we use inet_addr. */
488 hints.ai_flags = AI_NUMERICHOST;
489 if (flags & LH_PASSIVE)
490 hints.ai_flags |= AI_PASSIVE;
492 /* No need to specify timeout, as we're not resolving HOST, but
493 merely translating it from the presentation (ASCII) to network
495 err = getaddrinfo (host, NULL, &hints, &res);
496 if (err == 0 && res != NULL)
498 al = address_list_from_addrinfo (res);
504 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
505 if (addr_ipv4 != (uint32_t) -1)
507 /* The return value of inet_addr is in network byte order, so
508 we can just copy it to IP. */
510 vec[0] = (char *)&addr_ipv4;
512 return address_list_from_ipv4_addresses (vec);
517 /* Then, try to find the host in the cache. */
519 if (host_name_addresses_map)
521 al = hash_table_get (host_name_addresses_map, host);
524 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
531 if (!(flags & LH_SILENT))
532 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
534 /* Host name lookup goes on below. */
538 if (flags & LH_PASSIVE)
539 hints.ai_flags |= AI_PASSIVE;
541 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
542 if (err != 0 || res == NULL)
544 if (!(flags & LH_SILENT))
545 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
546 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
549 al = address_list_from_addrinfo (res);
553 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
556 if (!(flags & LH_SILENT))
558 if (errno != ETIMEDOUT)
559 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
561 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
565 assert (hptr->h_length == 4);
566 /* Do older systems have h_addr_list? */
567 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
571 /* Print the addresses determined by DNS lookup, but no more than
573 if (!(flags & LH_SILENT))
576 int printmax = al->count <= 3 ? al->count : 3;
577 for (i = 0; i < printmax; i++)
579 logprintf (LOG_VERBOSE, "%s",
580 pretty_print_address (al->addresses + i));
581 if (i < printmax - 1)
582 logputs (LOG_VERBOSE, ", ");
584 if (printmax != al->count)
585 logputs (LOG_VERBOSE, ", ...");
586 logputs (LOG_VERBOSE, "\n");
589 /* Cache the lookup information. */
591 cache_host_lookup (host, al);
596 /* Determine whether a URL is acceptable to be followed, according to
597 a list of domains to accept. */
599 accept_domain (struct url *u)
601 assert (u->host != NULL);
604 if (!sufmatch ((const char **)opt.domains, u->host))
607 if (opt.exclude_domains)
609 if (sufmatch ((const char **)opt.exclude_domains, u->host))
615 /* Check whether WHAT is matched in LIST, each element of LIST being a
616 pattern to match WHAT against, using backward matching (see
617 match_backwards() in utils.c).
619 If an element of LIST matched, 1 is returned, 0 otherwise. */
621 sufmatch (const char **list, const char *what)
626 for (i = 0; list[i]; i++)
628 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
629 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
631 /* The domain must be first to reach to beginning. */
638 /* Print error messages for host errors. */
642 /* Can't use switch since some constants are equal (at least on my
643 system), and the compiler signals "duplicate case value". */
644 if (error == HOST_NOT_FOUND
645 || error == NO_RECOVERY
647 || error == NO_ADDRESS
648 || error == TRY_AGAIN)
649 return _("Host not found");
651 return _("Unknown error");
655 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
657 struct address_list *al;
659 xfree (key); /* host */
661 al = (struct address_list *)value;
662 assert (al->refcount == 1);
663 address_list_delete (al);
671 if (host_name_addresses_map)
673 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
674 hash_table_destroy (host_name_addresses_map);
675 host_name_addresses_map = NULL;