1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 # define NO_ADDRESS NO_DATA
81 /* Mapping between known hosts and to lists of their addresses. */
83 static struct hash_table *host_name_addresses_map;
85 /* Lists of addresses. This should eventually be extended to handle
89 int count; /* number of adrresses */
90 ip_address *addresses; /* pointer to the string of addresses */
92 int faulty; /* number of addresses known not to work. */
93 int from_cache; /* whether this entry was pulled from
94 cache or freshly looked up. */
96 int refcount; /* reference count; when it drops to
97 0, the entry is freed. */
100 /* Get the bounds of the address list. */
103 address_list_get_bounds (const struct address_list *al, int *start, int *end)
109 /* Return whether this address list entry has been obtained from the
113 address_list_cached_p (const struct address_list *al)
115 return al->from_cache;
118 /* Return a pointer to the address at position POS. */
121 address_list_address_at (const struct address_list *al, int pos)
123 assert (pos >= al->faulty && pos < al->count);
124 return al->addresses + pos;
127 /* Check whether two address lists have all their IPs in common. */
130 address_list_match_all (const struct address_list *al1,
131 const struct address_list *al2)
138 if (al1->count != al2->count)
141 /* For the comparison to be complete, we'd need to sort the IP
142 addresses first. But that's not necessary because this is only
143 used as an optimization. */
146 /* In the non-IPv6 case, there is only one address type, so we can
147 compare the whole array with memcmp. */
148 return 0 == memcmp (al1->addresses, al2->addresses,
149 al1->count * sizeof (ip_address));
150 #else /* ENABLE_IPV6 */
151 for (i = 0; i < al1->count; ++i)
153 const ip_address *ip1 = &al1->addresses[i];
154 const ip_address *ip2 = &al2->addresses[i];
156 if (ip1->type != ip2->type)
162 if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr
164 ADDRESS_IPV4_IN_ADDR (ip2).s_addr)
168 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
169 if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2))
171 #endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
172 if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1),
173 &ADDRESS_IPV6_IN6_ADDR (ip2)))
181 #endif /* ENABLE_IPV6 */
184 /* Mark the INDEXth element of AL as faulty, so that the next time
185 this address list is used, the faulty element will be skipped. */
188 address_list_set_faulty (struct address_list *al, int index)
190 /* We assume that the address list is traversed in order, so that a
191 "faulty" attempt is always preceded with all-faulty addresses,
192 and this is how Wget uses it. */
193 assert (index == al->faulty);
196 if (al->faulty >= al->count)
197 /* All addresses have been proven faulty. Since there's not much
198 sense in returning the user an empty address list the next
199 time, we'll rather make them all clean, so that they can be
206 * address_list_from_addrinfo
208 * This function transform an addrinfo links list in and address_list.
211 * addrinfo* Linked list of addrinfo
214 * address_list* New allocated address_list
216 static struct address_list *
217 address_list_from_addrinfo (const struct addrinfo *ai)
219 struct address_list *al;
220 const struct addrinfo *ptr;
225 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
226 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
231 al = xnew0 (struct address_list);
232 al->addresses = xnew_array (ip_address, cnt);
237 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
238 if (ptr->ai_family == AF_INET6)
240 const struct sockaddr_in6 *sin6 =
241 (const struct sockaddr_in6 *)ptr->ai_addr;
242 ip->type = IPV6_ADDRESS;
243 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
244 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
245 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
249 else if (ptr->ai_family == AF_INET)
251 const struct sockaddr_in *sin =
252 (const struct sockaddr_in *)ptr->ai_addr;
253 ip->type = IPV4_ADDRESS;
254 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
257 assert (ip - al->addresses == cnt);
261 /* Create an address_list from a NULL-terminated vector of IPv4
262 addresses. This kind of vector is returned by gethostbyname. */
264 static struct address_list *
265 address_list_from_ipv4_addresses (char **vec)
268 struct address_list *al = xnew0 (struct address_list);
275 al->addresses = xnew_array (ip_address, count);
279 for (i = 0; i < count; i++)
281 ip_address *ip = &al->addresses[i];
282 ip->type = IPV4_ADDRESS;
283 memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
291 address_list_delete (struct address_list *al)
293 xfree (al->addresses);
298 address_list_release (struct address_list *al)
301 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
302 if (al->refcount <= 0)
304 DEBUGP (("Deleting unused %p.\n", al));
305 address_list_delete (al);
309 /* Versions of gethostbyname and getaddrinfo that support timeout. */
313 struct ghbnwt_context {
314 const char *host_name;
315 struct hostent *hptr;
319 gethostbyname_with_timeout_callback (void *arg)
321 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
322 ctx->hptr = gethostbyname (ctx->host_name);
325 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
326 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
327 The function makes sure that when NULL is returned for reasons
328 other than timeout, errno is reset. */
330 static struct hostent *
331 gethostbyname_with_timeout (const char *host_name, double timeout)
333 struct ghbnwt_context ctx;
334 ctx.host_name = host_name;
335 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
337 SET_H_ERRNO (HOST_NOT_FOUND);
346 #else /* ENABLE_IPV6 */
348 struct gaiwt_context {
351 const struct addrinfo *hints;
352 struct addrinfo **res;
357 getaddrinfo_with_timeout_callback (void *arg)
359 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
360 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
363 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
364 In case of timeout, the EAI_SYSTEM error code is returned and errno
365 is set to ETIMEDOUT. */
368 getaddrinfo_with_timeout (const char *node, const char *service,
369 const struct addrinfo *hints, struct addrinfo **res,
372 struct gaiwt_context ctx;
374 ctx.service = service;
378 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
383 return ctx.exit_code;
386 #endif /* ENABLE_IPV6 */
388 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
389 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
393 pretty_print_address (const ip_address *addr)
398 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
402 static char buf[128];
403 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
405 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
407 /* append "%SCOPE_ID" for all ?non-global? addresses */
408 char *p = buf + strlen (buf);
410 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
414 buf[sizeof (buf) - 1] = '\0';
423 /* Add host name HOST with the address ADDR_TEXT to the cache.
424 ADDR_LIST is a NULL-terminated list of addresses, as in struct
428 cache_host_lookup (const char *host, struct address_list *al)
430 if (!host_name_addresses_map)
431 host_name_addresses_map = make_nocase_string_hash_table (0);
434 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
440 debug_logprintf ("Caching %s =>", host);
441 for (i = 0; i < al->count; i++)
442 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
443 debug_logprintf ("\n");
448 /* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
452 forget_host_lookup (const char *host)
454 struct address_list *al = hash_table_get (host_name_addresses_map, host);
457 address_list_release (al);
458 hash_table_remove (host_name_addresses_map, host);
462 /* Look up HOST in DNS and return a list of IP addresses.
464 This function caches its result so that, if the same host is passed
465 the second time, the addresses are returned without the DNS lookup.
466 If you want to force lookup, call forget_host_lookup() prior to
467 this function, or set opt.dns_cache to 0 to globally disable
470 FLAGS can be a combination of:
471 LH_SILENT - don't print the "resolving ... done" message.
472 LH_IPV4_ONLY - return only IPv4 addresses.
473 LH_IPV6_ONLY - return only IPv6 addresses. */
475 struct address_list *
476 lookup_host (const char *host, int flags)
478 struct address_list *al = NULL;
482 struct addrinfo hints, *res;
485 hints.ai_socktype = SOCK_STREAM;
487 /* Should we inspect opt.<something> directly? */
488 if (flags & LH_IPV4_ONLY)
489 hints.ai_family = AF_INET;
490 else if (flags & LH_IPV6_ONLY)
491 hints.ai_family = AF_INET6;
493 hints.ai_family = AF_UNSPEC;
496 /* First, try to check whether the address is already a numeric
497 address, in which case we don't want to cache it or bother with
498 setting up timeouts. Plus, old (e.g. Ultrix) implementations of
499 gethostbyname can't handle numeric addresses (!).
501 Where getaddrinfo is available, we do it using the AI_NUMERICHOST
502 flag. Without IPv6, we use inet_addr. */
505 hints.ai_flags = AI_NUMERICHOST;
506 if (flags & LH_PASSIVE)
507 hints.ai_flags |= AI_PASSIVE;
509 /* No need to specify timeout, as we're not resolving HOST, but
510 merely translating it from the presentation (ASCII) to network
512 err = getaddrinfo (host, NULL, &hints, &res);
513 if (err == 0 && res != NULL)
515 al = address_list_from_addrinfo (res);
521 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
522 if (addr_ipv4 != (uint32_t) -1)
524 /* The return value of inet_addr is in network byte order, so
525 we can just copy it to IP. */
527 vec[0] = (char *)&addr_ipv4;
529 return address_list_from_ipv4_addresses (vec);
534 /* Then, try to find the host in the cache. */
536 if (host_name_addresses_map)
538 al = hash_table_get (host_name_addresses_map, host);
541 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
548 if (!(flags & LH_SILENT))
549 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
551 /* Host name lookup goes on below. */
555 if (flags & LH_PASSIVE)
556 hints.ai_flags |= AI_PASSIVE;
558 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
559 if (err != 0 || res == NULL)
561 if (!(flags & LH_SILENT))
562 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
563 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
566 al = address_list_from_addrinfo (res);
570 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
573 if (!(flags & LH_SILENT))
575 if (errno != ETIMEDOUT)
576 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
578 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
582 assert (hptr->h_length == 4);
583 /* Do older systems have h_addr_list? */
584 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
588 /* Print the addresses determined by DNS lookup, but no more than
590 if (!(flags & LH_SILENT))
593 int printmax = al->count <= 3 ? al->count : 3;
594 for (i = 0; i < printmax; i++)
596 logprintf (LOG_VERBOSE, "%s",
597 pretty_print_address (al->addresses + i));
598 if (i < printmax - 1)
599 logputs (LOG_VERBOSE, ", ");
601 if (printmax != al->count)
602 logputs (LOG_VERBOSE, ", ...");
603 logputs (LOG_VERBOSE, "\n");
606 /* Cache the lookup information. */
608 cache_host_lookup (host, al);
613 /* Determine whether a URL is acceptable to be followed, according to
614 a list of domains to accept. */
616 accept_domain (struct url *u)
618 assert (u->host != NULL);
621 if (!sufmatch ((const char **)opt.domains, u->host))
624 if (opt.exclude_domains)
626 if (sufmatch ((const char **)opt.exclude_domains, u->host))
632 /* Check whether WHAT is matched in LIST, each element of LIST being a
633 pattern to match WHAT against, using backward matching (see
634 match_backwards() in utils.c).
636 If an element of LIST matched, 1 is returned, 0 otherwise. */
638 sufmatch (const char **list, const char *what)
643 for (i = 0; list[i]; i++)
645 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
646 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
648 /* The domain must be first to reach to beginning. */
655 /* Print error messages for host errors. */
659 /* Can't use switch since some constants are equal (at least on my
660 system), and the compiler signals "duplicate case value". */
661 if (error == HOST_NOT_FOUND
662 || error == NO_RECOVERY
664 || error == NO_ADDRESS
665 || error == TRY_AGAIN)
666 return _("Host not found");
668 return _("Unknown error");
672 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
674 struct address_list *al;
676 xfree (key); /* host */
678 al = (struct address_list *)value;
679 assert (al->refcount == 1);
680 address_list_delete (al);
688 if (host_name_addresses_map)
690 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
691 hash_table_destroy (host_name_addresses_map);
692 host_name_addresses_map = NULL;