1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 # define NO_ADDRESS NO_DATA
81 /* Mapping between known hosts and to lists of their addresses. */
83 static struct hash_table *host_name_addresses_map;
86 /* The IP family to request when connecting to remote hosts. This
87 should be moved to an entry in struct options when we implement the
88 --inet4/--inet6 flags. */
89 static int requested_family = AF_UNSPEC;
92 /* Lists of addresses. This should eventually be extended to handle
96 int count; /* number of adrresses */
97 ip_address *addresses; /* pointer to the string of addresses */
99 int faulty; /* number of addresses known not to work. */
100 int connected; /* whether we were able to connect to
101 one of the addresses in the list,
104 int refcount; /* reference count; when it drops to
105 0, the entry is freed. */
108 /* Get the bounds of the address list. */
111 address_list_get_bounds (const struct address_list *al, int *start, int *end)
117 /* Return a pointer to the address at position POS. */
120 address_list_address_at (const struct address_list *al, int pos)
122 assert (pos >= al->faulty && pos < al->count);
123 return al->addresses + pos;
126 /* Return 1 if IP is one of the addresses in AL. */
129 address_list_find (const struct address_list *al, const ip_address *ip)
135 for (i = 0; i < al->count; i++)
137 ip_address *cur = al->addresses + i;
138 if (cur->type == IPV4_ADDRESS
139 && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
141 ADDRESS_IPV4_IN_ADDR (ip).s_addr))
147 for (i = 0; i < al->count; i++)
149 ip_address *cur = al->addresses + i;
150 if (cur->type == IPV6_ADDRESS
151 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
152 && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
154 && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
155 &ADDRESS_IPV6_IN6_ADDR (ip)))
159 #endif /* ENABLE_IPV6 */
166 /* Mark the INDEXth element of AL as faulty, so that the next time
167 this address list is used, the faulty element will be skipped. */
170 address_list_set_faulty (struct address_list *al, int index)
172 /* We assume that the address list is traversed in order, so that a
173 "faulty" attempt is always preceded with all-faulty addresses,
174 and this is how Wget uses it. */
175 assert (index == al->faulty);
178 if (al->faulty >= al->count)
179 /* All addresses have been proven faulty. Since there's not much
180 sense in returning the user an empty address list the next
181 time, we'll rather make them all clean, so that they can be
186 /* Set the "connected" flag to true. This flag used by connect.c to
187 see if the host perhaps needs to be resolved again. */
190 address_list_set_connected (struct address_list *al)
195 /* Return the value of the "connected" flag. */
198 address_list_connected_p (const struct address_list *al)
200 return al->connected;
205 /* Create an address_list from the addresses in the given struct
208 static struct address_list *
209 address_list_from_addrinfo (const struct addrinfo *ai)
211 struct address_list *al;
212 const struct addrinfo *ptr;
217 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
218 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
223 al = xnew0 (struct address_list);
224 al->addresses = xnew_array (ip_address, cnt);
229 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
230 if (ptr->ai_family == AF_INET6)
232 const struct sockaddr_in6 *sin6 =
233 (const struct sockaddr_in6 *)ptr->ai_addr;
234 ip->type = IPV6_ADDRESS;
235 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
236 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
237 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
241 else if (ptr->ai_family == AF_INET)
243 const struct sockaddr_in *sin =
244 (const struct sockaddr_in *)ptr->ai_addr;
245 ip->type = IPV4_ADDRESS;
246 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
249 assert (ip - al->addresses == cnt);
253 #else /* not ENABLE_IPV6 */
255 /* Create an address_list from a NULL-terminated vector of IPv4
256 addresses. This kind of vector is returned by gethostbyname. */
258 static struct address_list *
259 address_list_from_ipv4_addresses (char **vec)
262 struct address_list *al = xnew0 (struct address_list);
269 al->addresses = xnew_array (ip_address, count);
273 for (i = 0; i < count; i++)
275 ip_address *ip = &al->addresses[i];
276 ip->type = IPV4_ADDRESS;
277 memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
283 #endif /* not ENABLE_IPV6 */
286 address_list_delete (struct address_list *al)
288 xfree (al->addresses);
292 /* Mark the address list as being no longer in use. This will reduce
293 its reference count which will cause the list to be freed when the
297 address_list_release (struct address_list *al)
300 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
301 if (al->refcount <= 0)
303 DEBUGP (("Deleting unused %p.\n", al));
304 address_list_delete (al);
308 /* Versions of gethostbyname and getaddrinfo that support timeout. */
312 struct ghbnwt_context {
313 const char *host_name;
314 struct hostent *hptr;
318 gethostbyname_with_timeout_callback (void *arg)
320 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
321 ctx->hptr = gethostbyname (ctx->host_name);
324 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
325 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
326 The function makes sure that when NULL is returned for reasons
327 other than timeout, errno is reset. */
329 static struct hostent *
330 gethostbyname_with_timeout (const char *host_name, double timeout)
332 struct ghbnwt_context ctx;
333 ctx.host_name = host_name;
334 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
336 SET_H_ERRNO (HOST_NOT_FOUND);
345 /* Print error messages for host errors. */
347 host_errstr (int error)
349 /* Can't use switch since some of these constants can be equal,
350 which makes the compiler complain about duplicate case
352 if (error == HOST_NOT_FOUND
353 || error == NO_RECOVERY
355 || error == NO_ADDRESS)
356 return _("Unknown host");
357 else if (error == TRY_AGAIN)
358 /* Message modeled after what gai_strerror returns in similar
360 return _("Temporary failure in name resolution");
362 return _("Unknown error");
365 #else /* ENABLE_IPV6 */
367 struct gaiwt_context {
370 const struct addrinfo *hints;
371 struct addrinfo **res;
376 getaddrinfo_with_timeout_callback (void *arg)
378 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
379 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
382 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
383 In case of timeout, the EAI_SYSTEM error code is returned and errno
384 is set to ETIMEDOUT. */
387 getaddrinfo_with_timeout (const char *node, const char *service,
388 const struct addrinfo *hints, struct addrinfo **res,
391 struct gaiwt_context ctx;
393 ctx.service = service;
397 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
402 return ctx.exit_code;
405 #endif /* ENABLE_IPV6 */
407 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
408 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
412 pretty_print_address (const ip_address *addr)
417 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
421 static char buf[128];
422 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
424 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
426 /* append "%SCOPE_ID" for all ?non-global? addresses */
427 char *p = buf + strlen (buf);
429 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
433 buf[sizeof (buf) - 1] = '\0';
442 /* Add host name HOST with the address ADDR_TEXT to the cache.
443 ADDR_LIST is a NULL-terminated list of addresses, as in struct
447 cache_host_lookup (const char *host, struct address_list *al)
449 if (!host_name_addresses_map)
450 host_name_addresses_map = make_nocase_string_hash_table (0);
453 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
459 debug_logprintf ("Caching %s =>", host);
460 for (i = 0; i < al->count; i++)
461 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
462 debug_logprintf ("\n");
467 /* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
471 forget_host_lookup (const char *host)
473 struct address_list *al = hash_table_get (host_name_addresses_map, host);
476 address_list_release (al);
477 hash_table_remove (host_name_addresses_map, host);
481 /* Look up HOST in DNS and return a list of IP addresses.
483 This function caches its result so that, if the same host is passed
484 the second time, the addresses are returned without the DNS lookup.
485 If you want to force lookup, call forget_host_lookup() prior to
486 this function, or set opt.dns_cache to 0 to globally disable
489 If SILENT is non-zero, progress messages are not printed. */
491 struct address_list *
492 lookup_host (const char *host, int silent)
494 struct address_list *al = NULL;
497 /* If we're not using getaddrinfo, first check if HOST names a
498 numeric IPv4 address. gethostbyname is not required to accept
499 dotted-decimal IPv4 addresses, and some older implementations
500 (e.g. the Ultrix one) indeed didn't. */
502 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
503 if (addr_ipv4 != (uint32_t) -1)
505 /* No need to cache host->addr relation, just return the
508 vec[0] = (char *)&addr_ipv4;
510 return address_list_from_ipv4_addresses (vec);
515 /* Try to find the host in the cache. */
517 if (host_name_addresses_map)
519 al = hash_table_get (host_name_addresses_map, host);
522 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
528 /* No luck with the cache; resolve the host name. */
531 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
536 struct addrinfo hints, *res;
539 hints.ai_socktype = SOCK_STREAM;
540 hints.ai_family = requested_family;
543 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
544 if (err != 0 || res == NULL)
547 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
548 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
551 al = address_list_from_addrinfo (res);
556 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
561 if (errno != ETIMEDOUT)
562 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
563 host_errstr (h_errno));
565 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
569 /* Do older systems have h_addr_list? */
570 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
574 /* Print the addresses determined by DNS lookup, but no more than
579 int printmax = al->count <= 3 ? al->count : 3;
580 for (i = 0; i < printmax; i++)
582 logprintf (LOG_VERBOSE, "%s",
583 pretty_print_address (al->addresses + i));
584 if (i < printmax - 1)
585 logputs (LOG_VERBOSE, ", ");
587 if (printmax != al->count)
588 logputs (LOG_VERBOSE, ", ...");
589 logputs (LOG_VERBOSE, "\n");
592 /* Cache the lookup information. */
594 cache_host_lookup (host, al);
599 /* Resolve HOST to get an address for use with bind(2). Do *not* use
600 this for sockets to be used with connect(2).
602 This is a function separate from lookup_host because the results it
603 returns are different -- it uses the AI_PASSIVE flag to
604 getaddrinfo. Because of this distinction, it doesn't store the
605 results in the cache. It prints nothing and implements no timeouts
606 because it should normally only be used with local addresses
607 (typically "localhost" or numeric addresses of different local
610 Without IPv6, this function just calls lookup_host. */
612 struct address_list *
613 lookup_host_passive (const char *host)
616 struct address_list *al = NULL;
618 struct addrinfo hints, *res;
621 hints.ai_socktype = SOCK_STREAM;
622 hints.ai_family = requested_family;
623 hints.ai_flags = AI_PASSIVE;
625 err = getaddrinfo (host, NULL, &hints, &res);
626 if (err != 0 || res == NULL)
628 al = address_list_from_addrinfo (res);
632 return lookup_host (host, 1);
636 /* Determine whether a URL is acceptable to be followed, according to
637 a list of domains to accept. */
639 accept_domain (struct url *u)
641 assert (u->host != NULL);
644 if (!sufmatch ((const char **)opt.domains, u->host))
647 if (opt.exclude_domains)
649 if (sufmatch ((const char **)opt.exclude_domains, u->host))
655 /* Check whether WHAT is matched in LIST, each element of LIST being a
656 pattern to match WHAT against, using backward matching (see
657 match_backwards() in utils.c).
659 If an element of LIST matched, 1 is returned, 0 otherwise. */
661 sufmatch (const char **list, const char *what)
666 for (i = 0; list[i]; i++)
668 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
669 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
671 /* The domain must be first to reach to beginning. */
679 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
681 struct address_list *al;
683 xfree (key); /* host */
685 al = (struct address_list *)value;
686 assert (al->refcount == 1);
687 address_list_delete (al);
695 if (host_name_addresses_map)
697 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
698 hash_table_destroy (host_name_addresses_map);
699 host_name_addresses_map = NULL;