1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 #define NO_ADDRESS NO_DATA
63 #ifdef HAVE_SYS_UTSNAME_H
64 # include <sys/utsname.h>
85 int ip_default_family = AF_UNSPEC;
87 int ip_default_family = AF_INET;
90 /* Mapping between known hosts and to lists of their addresses. */
92 static struct hash_table *host_name_addresses_map;
94 /* Lists of addresses. This should eventually be extended to handle
98 int count; /* number of adrresses */
99 ip_address *addresses; /* pointer to the string of addresses */
101 int faulty; /* number of addresses known not to work. */
102 int from_cache; /* whether this entry was pulled from
103 cache or freshly looked up. */
105 int refcount; /* reference count; when it drops to
106 0, the entry is freed. */
109 /* Get the bounds of the address list. */
112 address_list_get_bounds (const struct address_list *al, int *start, int *end)
118 /* Return whether this address list entry has been obtained from the
122 address_list_cached_p (const struct address_list *al)
124 return al->from_cache;
127 /* Return a pointer to the address at position POS. */
130 address_list_address_at (const struct address_list *al, int pos)
132 assert (pos >= al->faulty && pos < al->count);
133 return al->addresses + pos;
136 /* Check whether two address lists have all their IPs in common. */
139 address_list_match_all (const struct address_list *al1,
140 const struct address_list *al2)
147 if (al1->count != al2->count)
150 /* For the comparison to be complete, we'd need to sort the IP
151 addresses first. But that's not necessary because this is only
152 used as an optimization. */
155 /* In the non-IPv6 case, there is only one address type, so we can
156 compare the whole array with memcmp. */
157 return 0 == memcmp (al1->addresses, al2->addresses,
158 al1->count * sizeof (ip_address));
159 #else /* ENABLE_IPV6 */
160 for (i = 0; i < al1->count; ++i)
162 const ip_address *ip1 = &al1->addresses[i];
163 const ip_address *ip2 = &al2->addresses[i];
165 if (ip1->type != ip2->type)
171 if (ADDRESS_IPV4_IN_ADDR (ip1).s_addr
172 != ADDRESS_IPV4_IN_ADDR (ip2).s_addr)
176 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
177 if (ADDRESS_IPV6_SCOPE (ip1) != ADDRESS_IPV6_SCOPE (ip2))
179 #endif /* HAVE_SOCKADDR_IN6_SCOPE_ID */
180 if (!IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (ip1),
181 &ADDRESS_IPV6_IN6_ADDR (ip2)))
189 #endif /* ENABLE_IPV6 */
192 /* Mark the INDEXth element of AL as faulty, so that the next time
193 this address list is used, the faulty element will be skipped. */
196 address_list_set_faulty (struct address_list *al, int index)
198 /* We assume that the address list is traversed in order, so that a
199 "faulty" attempt is always preceded with all-faulty addresses,
200 and this is how Wget uses it. */
201 assert (index == al->faulty);
204 if (al->faulty >= al->count)
205 /* All addresses have been proven faulty. Since there's not much
206 sense in returning the user an empty address list the next
207 time, we'll rather make them all clean, so that they can be
214 * address_list_from_addrinfo
216 * This function transform an addrinfo links list in and address_list.
219 * addrinfo* Linked list of addrinfo
222 * address_list* New allocated address_list
224 static struct address_list *
225 address_list_from_addrinfo (const struct addrinfo *ai)
227 struct address_list *al;
228 const struct addrinfo *ptr;
233 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
234 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
239 al = xnew0 (struct address_list);
240 al->addresses = xnew_array (ip_address, cnt);
245 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
246 if (ptr->ai_family == AF_INET6)
248 const struct sockaddr_in6 *sin6 =
249 (const struct sockaddr_in6 *)ptr->ai_addr;
250 ip->type = IPV6_ADDRESS;
251 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
252 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
253 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
257 else if (ptr->ai_family == AF_INET)
259 const struct sockaddr_in *sin =
260 (const struct sockaddr_in *)ptr->ai_addr;
261 ip->type = IPV4_ADDRESS;
262 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
265 assert (ip - al->addresses == cnt);
269 /* Create an address_list from a NULL-terminated vector of IPv4
270 addresses. This kind of vector is returned by gethostbyname. */
272 static struct address_list *
273 address_list_from_ipv4_addresses (char **h_addr_list)
276 struct address_list *al = xnew0 (struct address_list);
279 while (h_addr_list[count])
283 al->addresses = xnew_array (ip_address, count);
287 for (i = 0; i < count; i++)
289 ip_address *ip = &al->addresses[i];
290 ip->type = IPV4_ADDRESS;
291 memcpy (ADDRESS_IPV4_DATA (ip), h_addr_list[i], 4);
299 address_list_delete (struct address_list *al)
301 xfree (al->addresses);
306 address_list_release (struct address_list *al)
309 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
310 if (al->refcount <= 0)
312 DEBUGP (("Deleting unused %p.\n", al));
313 address_list_delete (al);
317 /* Versions of gethostbyname and getaddrinfo that support timeout. */
321 struct ghbnwt_context {
322 const char *host_name;
323 struct hostent *hptr;
327 gethostbyname_with_timeout_callback (void *arg)
329 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
330 ctx->hptr = gethostbyname (ctx->host_name);
333 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
334 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
335 The function makes sure that when NULL is returned for reasons
336 other than timeout, errno is reset. */
338 static struct hostent *
339 gethostbyname_with_timeout (const char *host_name, double timeout)
341 struct ghbnwt_context ctx;
342 ctx.host_name = host_name;
343 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
345 SET_H_ERRNO (HOST_NOT_FOUND);
354 #else /* ENABLE_IPV6 */
356 struct gaiwt_context {
359 const struct addrinfo *hints;
360 struct addrinfo **res;
365 getaddrinfo_with_timeout_callback (void *arg)
367 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
368 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
371 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
372 In case of timeout, the EAI_SYSTEM error code is returned and errno
373 is set to ETIMEDOUT. */
376 getaddrinfo_with_timeout (const char *node, const char *service,
377 const struct addrinfo *hints, struct addrinfo **res,
380 struct gaiwt_context ctx;
382 ctx.service = service;
386 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
391 return ctx.exit_code;
394 #endif /* ENABLE_IPV6 */
396 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
397 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
401 pretty_print_address (const ip_address *addr)
406 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
410 static char buf[128];
411 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
413 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
415 /* append "%SCOPE_ID" for all ?non-global? addresses */
416 char *p = buf + strlen (buf);
418 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
422 buf[sizeof (buf) - 1] = '\0';
431 /* Add host name HOST with the address ADDR_TEXT to the cache.
432 ADDR_LIST is a NULL-terminated list of addresses, as in struct
436 cache_host_lookup (const char *host, struct address_list *al)
438 if (!host_name_addresses_map)
439 host_name_addresses_map = make_nocase_string_hash_table (0);
442 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
448 debug_logprintf ("Caching %s =>", host);
449 for (i = 0; i < al->count; i++)
450 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
451 debug_logprintf ("\n");
457 forget_host_lookup (const char *host)
459 struct address_list *al = hash_table_get (host_name_addresses_map, host);
462 address_list_release (al);
463 hash_table_remove (host_name_addresses_map, host);
467 struct address_list *
468 lookup_host (const char *host, int flags)
470 struct address_list *al = NULL;
474 struct addrinfo hints, *res;
476 /* Is this necessary? Should this function be changed to accept a
478 if (flags & LH_IPV4_ONLY)
480 else if (flags & LH_IPV6_ONLY)
483 family = ip_default_family;
486 /* First, try to check whether the address is already a numeric
487 address, in which case we don't need to cache it or bother with
488 setting up timeouts. Plus, if memory serves me right, Ultrix's
489 gethostbyname can't handle numeric addresses (!).
491 Where getaddrinfo is available, we do it using the AI_NUMERICHOST
492 flag. Without IPv6, we use inet_addr succeeds. */
496 hints.ai_family = family;
497 hints.ai_socktype = SOCK_STREAM;
498 hints.ai_flags = AI_NUMERICHOST;
499 if (flags & LH_PASSIVE)
500 hints.ai_flags = AI_PASSIVE;
502 /* no need to call getaddrinfo_with_timeout here, as we're not
503 * relying on the DNS, but we're only doing an address translation
504 * from presentation (ASCII) to network format */
505 err = getaddrinfo (host, NULL, &hints, &res);
506 if (err == 0 && res != NULL)
508 al = address_list_from_addrinfo (res);
514 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
515 if (addr_ipv4 != (uint32_t) -1)
517 /* The return value of inet_addr is in network byte order, so
518 we can just copy it to IP. */
520 vec[0] = (char *)&addr_ipv4;
522 return address_list_from_ipv4_addresses (vec);
527 /* Then, try to find the host in the cache. */
529 if (host_name_addresses_map)
531 al = hash_table_get (host_name_addresses_map, host);
534 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
541 if (!(flags & LH_SILENT))
542 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
544 /* Host name lookup goes on below. */
549 hints.ai_family = family;
550 hints.ai_socktype = SOCK_STREAM;
551 if (flags & LH_PASSIVE)
552 hints.ai_flags = AI_PASSIVE;
554 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
556 if (err != 0 || res == NULL)
558 if (!(flags & LH_SILENT))
559 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
560 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
563 al = address_list_from_addrinfo (res);
568 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
571 if (!(flags & LH_SILENT))
573 if (errno != ETIMEDOUT)
574 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
576 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
580 assert (hptr->h_length == 4);
581 /* Do older systems have h_addr_list? */
582 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
586 /* Print the addresses determined by DNS lookup, but no more than
588 if (!(flags & LH_SILENT))
591 int printmax = al->count <= 3 ? al->count : 3;
592 for (i = 0; i < printmax; i++)
594 logprintf (LOG_VERBOSE, "%s",
595 pretty_print_address (al->addresses + i));
596 if (i < printmax - 1)
597 logputs (LOG_VERBOSE, ", ");
599 if (printmax != al->count)
600 logputs (LOG_VERBOSE, ", ...");
601 logputs (LOG_VERBOSE, "\n");
604 /* Cache the lookup information. */
606 cache_host_lookup (host, al);
611 /* Determine whether a URL is acceptable to be followed, according to
612 a list of domains to accept. */
614 accept_domain (struct url *u)
616 assert (u->host != NULL);
619 if (!sufmatch ((const char **)opt.domains, u->host))
622 if (opt.exclude_domains)
624 if (sufmatch ((const char **)opt.exclude_domains, u->host))
630 /* Check whether WHAT is matched in LIST, each element of LIST being a
631 pattern to match WHAT against, using backward matching (see
632 match_backwards() in utils.c).
634 If an element of LIST matched, 1 is returned, 0 otherwise. */
636 sufmatch (const char **list, const char *what)
641 for (i = 0; list[i]; i++)
643 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
644 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
646 /* The domain must be first to reach to beginning. */
653 /* Print error messages for host errors. */
657 /* Can't use switch since some constants are equal (at least on my
658 system), and the compiler signals "duplicate case value". */
659 if (error == HOST_NOT_FOUND
660 || error == NO_RECOVERY
662 || error == NO_ADDRESS
663 || error == TRY_AGAIN)
664 return _("Host not found");
666 return _("Unknown error");
670 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
672 struct address_list *al;
674 xfree (key); /* host */
676 al = (struct address_list *)value;
677 assert (al->refcount == 1);
678 address_list_delete (al);
686 if (host_name_addresses_map)
688 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
689 hash_table_destroy (host_name_addresses_map);
690 host_name_addresses_map = NULL;