1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 # define NO_ADDRESS NO_DATA
81 /* Mapping between known hosts and to lists of their addresses. */
83 static struct hash_table *host_name_addresses_map;
86 /* The default IP family for looking up host names. This should be
87 moved to an entry in struct options when we implement the
88 --inet4/--inet6 flags. */
89 static int ip_default_family = AF_UNSPEC;
92 /* Lists of addresses. This should eventually be extended to handle
96 int count; /* number of adrresses */
97 ip_address *addresses; /* pointer to the string of addresses */
99 int faulty; /* number of addresses known not to work. */
100 int from_cache; /* whether this entry was pulled from
101 cache or freshly looked up. */
103 int refcount; /* reference count; when it drops to
104 0, the entry is freed. */
107 /* Get the bounds of the address list. */
110 address_list_get_bounds (const struct address_list *al, int *start, int *end)
116 /* Return whether this address list entry has been obtained from the
120 address_list_cached_p (const struct address_list *al)
122 return al->from_cache;
125 /* Return a pointer to the address at position POS. */
128 address_list_address_at (const struct address_list *al, int pos)
130 assert (pos >= al->faulty && pos < al->count);
131 return al->addresses + pos;
134 /* Return 1 if IP is one of the addresses in AL. */
137 address_list_find (const struct address_list *al, const ip_address *ip)
143 for (i = 0; i < al->count; i++)
145 ip_address *cur = al->addresses + i;
146 if (cur->type == IPV4_ADDRESS
147 && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
149 ADDRESS_IPV4_IN_ADDR (ip).s_addr))
155 for (i = 0; i < al->count; i++)
157 ip_address *cur = al->addresses + i;
158 if (cur->type == IPV6_ADDRESS
159 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
160 && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
162 && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
163 &ADDRESS_IPV6_IN6_ADDR (ip)))
167 #endif /* ENABLE_IPV6 */
174 /* Mark the INDEXth element of AL as faulty, so that the next time
175 this address list is used, the faulty element will be skipped. */
178 address_list_set_faulty (struct address_list *al, int index)
180 /* We assume that the address list is traversed in order, so that a
181 "faulty" attempt is always preceded with all-faulty addresses,
182 and this is how Wget uses it. */
183 assert (index == al->faulty);
186 if (al->faulty >= al->count)
187 /* All addresses have been proven faulty. Since there's not much
188 sense in returning the user an empty address list the next
189 time, we'll rather make them all clean, so that they can be
196 * address_list_from_addrinfo
198 * This function transform an addrinfo links list in and address_list.
201 * addrinfo* Linked list of addrinfo
204 * address_list* New allocated address_list
206 static struct address_list *
207 address_list_from_addrinfo (const struct addrinfo *ai)
209 struct address_list *al;
210 const struct addrinfo *ptr;
215 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
216 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
221 al = xnew0 (struct address_list);
222 al->addresses = xnew_array (ip_address, cnt);
227 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
228 if (ptr->ai_family == AF_INET6)
230 const struct sockaddr_in6 *sin6 =
231 (const struct sockaddr_in6 *)ptr->ai_addr;
232 ip->type = IPV6_ADDRESS;
233 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
234 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
235 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
239 else if (ptr->ai_family == AF_INET)
241 const struct sockaddr_in *sin =
242 (const struct sockaddr_in *)ptr->ai_addr;
243 ip->type = IPV4_ADDRESS;
244 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
247 assert (ip - al->addresses == cnt);
251 /* Create an address_list from a NULL-terminated vector of IPv4
252 addresses. This kind of vector is returned by gethostbyname. */
254 static struct address_list *
255 address_list_from_ipv4_addresses (char **vec)
258 struct address_list *al = xnew0 (struct address_list);
265 al->addresses = xnew_array (ip_address, count);
269 for (i = 0; i < count; i++)
271 ip_address *ip = &al->addresses[i];
272 ip->type = IPV4_ADDRESS;
273 memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
281 address_list_delete (struct address_list *al)
283 xfree (al->addresses);
288 address_list_release (struct address_list *al)
291 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
292 if (al->refcount <= 0)
294 DEBUGP (("Deleting unused %p.\n", al));
295 address_list_delete (al);
299 /* Versions of gethostbyname and getaddrinfo that support timeout. */
303 struct ghbnwt_context {
304 const char *host_name;
305 struct hostent *hptr;
309 gethostbyname_with_timeout_callback (void *arg)
311 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
312 ctx->hptr = gethostbyname (ctx->host_name);
315 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
316 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
317 The function makes sure that when NULL is returned for reasons
318 other than timeout, errno is reset. */
320 static struct hostent *
321 gethostbyname_with_timeout (const char *host_name, double timeout)
323 struct ghbnwt_context ctx;
324 ctx.host_name = host_name;
325 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
327 SET_H_ERRNO (HOST_NOT_FOUND);
336 /* Print error messages for host errors. */
338 host_errstr (int error)
340 /* Can't use switch since some of these constants can be equal,
341 which makes the compiler complain about duplicate case
343 if (error == HOST_NOT_FOUND
344 || error == NO_RECOVERY
346 || error == NO_ADDRESS)
347 return _("Host not found");
348 else if (error == TRY_AGAIN)
349 /* Message modeled after what gai_strerror returns in similar
351 return _("Temporary failure in name resolution");
353 return _("Unknown error");
356 #else /* ENABLE_IPV6 */
358 struct gaiwt_context {
361 const struct addrinfo *hints;
362 struct addrinfo **res;
367 getaddrinfo_with_timeout_callback (void *arg)
369 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
370 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
373 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
374 In case of timeout, the EAI_SYSTEM error code is returned and errno
375 is set to ETIMEDOUT. */
378 getaddrinfo_with_timeout (const char *node, const char *service,
379 const struct addrinfo *hints, struct addrinfo **res,
382 struct gaiwt_context ctx;
384 ctx.service = service;
388 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
393 return ctx.exit_code;
396 #endif /* ENABLE_IPV6 */
398 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
399 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
403 pretty_print_address (const ip_address *addr)
408 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
412 static char buf[128];
413 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
415 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
417 /* append "%SCOPE_ID" for all ?non-global? addresses */
418 char *p = buf + strlen (buf);
420 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
424 buf[sizeof (buf) - 1] = '\0';
433 /* Add host name HOST with the address ADDR_TEXT to the cache.
434 ADDR_LIST is a NULL-terminated list of addresses, as in struct
438 cache_host_lookup (const char *host, struct address_list *al)
440 if (!host_name_addresses_map)
441 host_name_addresses_map = make_nocase_string_hash_table (0);
444 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
450 debug_logprintf ("Caching %s =>", host);
451 for (i = 0; i < al->count; i++)
452 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
453 debug_logprintf ("\n");
458 /* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
462 forget_host_lookup (const char *host)
464 struct address_list *al = hash_table_get (host_name_addresses_map, host);
467 address_list_release (al);
468 hash_table_remove (host_name_addresses_map, host);
472 /* Look up HOST in DNS and return a list of IP addresses.
474 This function caches its result so that, if the same host is passed
475 the second time, the addresses are returned without the DNS lookup.
476 If you want to force lookup, call forget_host_lookup() prior to
477 this function, or set opt.dns_cache to 0 to globally disable
480 If SILENT is non-zero, progress messages are not printed. */
482 struct address_list *
483 lookup_host (const char *host, int silent)
485 struct address_list *al = NULL;
489 struct addrinfo hints, *res;
491 hints.ai_socktype = SOCK_STREAM;
492 hints.ai_family = ip_default_family;
495 /* First, try to check whether the address is already a numeric
496 address, in which case we don't want to cache it or bother with
497 setting up timeouts. Plus, old (e.g. Ultrix) implementations of
498 gethostbyname can't handle numeric addresses (!).
500 Where getaddrinfo is available, we do it using the AI_NUMERICHOST
501 flag. Without IPv6, we use inet_addr. */
504 hints.ai_flags = AI_NUMERICHOST;
506 /* No need to specify timeout, as we're not resolving HOST, but
507 merely translating it from the presentation (ASCII) to network
509 err = getaddrinfo (host, NULL, &hints, &res);
510 if (err == 0 && res != NULL)
512 al = address_list_from_addrinfo (res);
518 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
519 if (addr_ipv4 != (uint32_t) -1)
521 /* The return value of inet_addr is in network byte order, so
522 we can just copy it to IP. */
524 vec[0] = (char *)&addr_ipv4;
526 return address_list_from_ipv4_addresses (vec);
531 /* Then, try to find the host in the cache. */
533 if (host_name_addresses_map)
535 al = hash_table_get (host_name_addresses_map, host);
538 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
545 /* No luck with the cache; resolve the host name. */
548 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
553 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
554 if (err != 0 || res == NULL)
557 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
558 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
561 al = address_list_from_addrinfo (res);
565 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
570 if (errno != ETIMEDOUT)
571 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
572 host_errstr (h_errno));
574 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
578 /* Do older systems have h_addr_list? */
579 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
583 /* Print the addresses determined by DNS lookup, but no more than
588 int printmax = al->count <= 3 ? al->count : 3;
589 for (i = 0; i < printmax; i++)
591 logprintf (LOG_VERBOSE, "%s",
592 pretty_print_address (al->addresses + i));
593 if (i < printmax - 1)
594 logputs (LOG_VERBOSE, ", ");
596 if (printmax != al->count)
597 logputs (LOG_VERBOSE, ", ...");
598 logputs (LOG_VERBOSE, "\n");
601 /* Cache the lookup information. */
603 cache_host_lookup (host, al);
608 /* Resolve HOST to get an address for use with bind(2). Do *not* use
609 this for sockets to be used with connect(2).
611 This is a function separate from lookup_host because the results it
612 returns are different -- it uses the AI_PASSIVE flag to
613 getaddrinfo. Because of this distinction, it doesn't store the
614 results in the cache. It prints nothing and implements no timeouts
615 because it should normally only be used with local addresses
616 (typically "localhost" or numeric addresses of different local
619 Without IPv6, this function just calls lookup_host. */
621 struct address_list *
622 lookup_host_passive (const char *host)
625 struct address_list *al = NULL;
627 struct addrinfo hints, *res;
630 hints.ai_socktype = SOCK_STREAM;
631 hints.ai_family = ip_default_family;
632 hints.ai_flags = AI_PASSIVE;
634 err = getaddrinfo (host, NULL, &hints, &res);
635 if (err != 0 || res == NULL)
637 al = address_list_from_addrinfo (res);
641 return lookup_host (host, 1);
645 /* Determine whether a URL is acceptable to be followed, according to
646 a list of domains to accept. */
648 accept_domain (struct url *u)
650 assert (u->host != NULL);
653 if (!sufmatch ((const char **)opt.domains, u->host))
656 if (opt.exclude_domains)
658 if (sufmatch ((const char **)opt.exclude_domains, u->host))
664 /* Check whether WHAT is matched in LIST, each element of LIST being a
665 pattern to match WHAT against, using backward matching (see
666 match_backwards() in utils.c).
668 If an element of LIST matched, 1 is returned, 0 otherwise. */
670 sufmatch (const char **list, const char *what)
675 for (i = 0; list[i]; i++)
677 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
678 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
680 /* The domain must be first to reach to beginning. */
688 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
690 struct address_list *al;
692 xfree (key); /* host */
694 al = (struct address_list *)value;
695 assert (al->refcount == 1);
696 address_list_delete (al);
704 if (host_name_addresses_map)
706 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
707 hash_table_destroy (host_name_addresses_map);
708 host_name_addresses_map = NULL;