1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 # define NO_ADDRESS NO_DATA
81 /* Mapping between known hosts and to lists of their addresses. */
83 static struct hash_table *host_name_addresses_map;
86 /* The IP family to request when connecting to remote hosts. This
87 should be moved to an entry in struct options when we implement the
88 --inet4/--inet6 flags. */
89 static int requested_family = AF_UNSPEC;
92 /* Lists of addresses. This should eventually be extended to handle
96 int count; /* number of adrresses */
97 ip_address *addresses; /* pointer to the string of addresses */
99 int faulty; /* number of addresses known not to work. */
100 int connected; /* whether we were able to connect to
101 one of the addresses in the list,
104 int refcount; /* reference count; when it drops to
105 0, the entry is freed. */
108 /* Get the bounds of the address list. */
111 address_list_get_bounds (const struct address_list *al, int *start, int *end)
117 /* Return a pointer to the address at position POS. */
120 address_list_address_at (const struct address_list *al, int pos)
122 assert (pos >= al->faulty && pos < al->count);
123 return al->addresses + pos;
126 /* Return 1 if IP is one of the addresses in AL. */
129 address_list_find (const struct address_list *al, const ip_address *ip)
135 for (i = 0; i < al->count; i++)
137 ip_address *cur = al->addresses + i;
138 if (cur->type == IPV4_ADDRESS
139 && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
141 ADDRESS_IPV4_IN_ADDR (ip).s_addr))
147 for (i = 0; i < al->count; i++)
149 ip_address *cur = al->addresses + i;
150 if (cur->type == IPV6_ADDRESS
151 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
152 && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
154 && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
155 &ADDRESS_IPV6_IN6_ADDR (ip)))
159 #endif /* ENABLE_IPV6 */
166 /* Mark the INDEXth element of AL as faulty, so that the next time
167 this address list is used, the faulty element will be skipped. */
170 address_list_set_faulty (struct address_list *al, int index)
172 /* We assume that the address list is traversed in order, so that a
173 "faulty" attempt is always preceded with all-faulty addresses,
174 and this is how Wget uses it. */
175 assert (index == al->faulty);
178 if (al->faulty >= al->count)
179 /* All addresses have been proven faulty. Since there's not much
180 sense in returning the user an empty address list the next
181 time, we'll rather make them all clean, so that they can be
186 /* Set the "connected" flag to true. This flag used by connect.c to
187 see if the host perhaps needs to be resolved again. */
190 address_list_set_connected (struct address_list *al)
195 /* Return the value of the "connected" flag. */
198 address_list_connected_p (const struct address_list *al)
200 return al->connected;
205 /* Create an address_list from the addresses in the given struct
208 static struct address_list *
209 address_list_from_addrinfo (const struct addrinfo *ai)
211 struct address_list *al;
212 const struct addrinfo *ptr;
217 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
218 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
223 al = xnew0 (struct address_list);
224 al->addresses = xnew_array (ip_address, cnt);
229 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
230 if (ptr->ai_family == AF_INET6)
232 const struct sockaddr_in6 *sin6 =
233 (const struct sockaddr_in6 *)ptr->ai_addr;
234 ip->type = IPV6_ADDRESS;
235 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
236 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
237 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
241 else if (ptr->ai_family == AF_INET)
243 const struct sockaddr_in *sin =
244 (const struct sockaddr_in *)ptr->ai_addr;
245 ip->type = IPV4_ADDRESS;
246 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
249 assert (ip - al->addresses == cnt);
253 #else /* not ENABLE_IPV6 */
255 /* Create an address_list from a NULL-terminated vector of IPv4
256 addresses. This kind of vector is returned by gethostbyname. */
258 static struct address_list *
259 address_list_from_ipv4_addresses (char **vec)
262 struct address_list *al = xnew0 (struct address_list);
269 al->addresses = xnew_array (ip_address, count);
273 for (i = 0; i < count; i++)
275 ip_address *ip = &al->addresses[i];
276 ip->type = IPV4_ADDRESS;
277 memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
283 #endif /* not ENABLE_IPV6 */
286 address_list_delete (struct address_list *al)
288 xfree (al->addresses);
292 /* Mark the address list as being no longer in use. This will reduce
293 its reference count which will cause the list to be freed when the
297 address_list_release (struct address_list *al)
300 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
301 if (al->refcount <= 0)
303 DEBUGP (("Deleting unused %p.\n", al));
304 address_list_delete (al);
308 /* Versions of gethostbyname and getaddrinfo that support timeout. */
312 struct ghbnwt_context {
313 const char *host_name;
314 struct hostent *hptr;
318 gethostbyname_with_timeout_callback (void *arg)
320 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
321 ctx->hptr = gethostbyname (ctx->host_name);
324 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
325 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
326 The function makes sure that when NULL is returned for reasons
327 other than timeout, errno is reset. */
329 static struct hostent *
330 gethostbyname_with_timeout (const char *host_name, double timeout)
332 struct ghbnwt_context ctx;
333 ctx.host_name = host_name;
334 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
336 SET_H_ERRNO (HOST_NOT_FOUND);
345 /* Print error messages for host errors. */
347 host_errstr (int error)
349 /* Can't use switch since some of these constants can be equal,
350 which makes the compiler complain about duplicate case
352 if (error == HOST_NOT_FOUND
353 || error == NO_RECOVERY
355 || error == NO_ADDRESS)
356 return _("Unknown host");
357 else if (error == TRY_AGAIN)
358 /* Message modeled after what gai_strerror returns in similar
360 return _("Temporary failure in name resolution");
362 return _("Unknown error");
365 #else /* ENABLE_IPV6 */
367 struct gaiwt_context {
370 const struct addrinfo *hints;
371 struct addrinfo **res;
376 getaddrinfo_with_timeout_callback (void *arg)
378 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
379 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
382 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
383 In case of timeout, the EAI_SYSTEM error code is returned and errno
384 is set to ETIMEDOUT. */
387 getaddrinfo_with_timeout (const char *node, const char *service,
388 const struct addrinfo *hints, struct addrinfo **res,
391 struct gaiwt_context ctx;
393 ctx.service = service;
397 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
402 return ctx.exit_code;
405 #endif /* ENABLE_IPV6 */
407 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
408 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
412 pretty_print_address (const ip_address *addr)
417 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
421 static char buf[128];
422 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
424 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
426 /* append "%SCOPE_ID" for all ?non-global? addresses */
427 char *p = buf + strlen (buf);
429 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
433 buf[sizeof (buf) - 1] = '\0';
442 /* Add host name HOST with the address ADDR_TEXT to the cache.
443 ADDR_LIST is a NULL-terminated list of addresses, as in struct
447 cache_host_lookup (const char *host, struct address_list *al)
449 if (!host_name_addresses_map)
450 host_name_addresses_map = make_nocase_string_hash_table (0);
453 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
459 debug_logprintf ("Caching %s =>", host);
460 for (i = 0; i < al->count; i++)
461 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
462 debug_logprintf ("\n");
467 /* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
471 forget_host_lookup (const char *host)
473 struct address_list *al = hash_table_get (host_name_addresses_map, host);
476 address_list_release (al);
477 hash_table_remove (host_name_addresses_map, host);
481 /* Look up HOST in DNS and return a list of IP addresses.
483 This function caches its result so that, if the same host is passed
484 the second time, the addresses are returned without the DNS lookup.
485 If you want to force lookup, call forget_host_lookup() prior to
486 this function, or set opt.dns_cache to 0 to globally disable
489 If SILENT is non-zero, progress messages are not printed. */
491 struct address_list *
492 lookup_host (const char *host, int silent)
494 struct address_list *al = NULL;
496 /* If we're not using getaddrinfo, first check if HOST names a
497 numeric IPv4 address. This was necessary under old (e.g. Ultrix)
498 implementations of gethostbyname that couldn't handle numeric
499 addresses (!). This is not done under IPv6 because getaddrinfo
500 always handles numeric addresses. */
503 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
504 if (addr_ipv4 != (uint32_t) -1)
506 /* No need to cache host->addr relation, just return the
509 vec[0] = (char *)&addr_ipv4;
511 return address_list_from_ipv4_addresses (vec);
516 /* Try to find the host in the cache. */
518 if (host_name_addresses_map)
520 al = hash_table_get (host_name_addresses_map, host);
523 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
529 /* No luck with the cache; resolve the host name. */
532 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
537 struct addrinfo hints, *res;
540 hints.ai_socktype = SOCK_STREAM;
541 hints.ai_family = requested_family;
544 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
545 if (err != 0 || res == NULL)
548 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
549 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
552 al = address_list_from_addrinfo (res);
557 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
562 if (errno != ETIMEDOUT)
563 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
564 host_errstr (h_errno));
566 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
570 /* Do older systems have h_addr_list? */
571 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
575 /* Print the addresses determined by DNS lookup, but no more than
580 int printmax = al->count <= 3 ? al->count : 3;
581 for (i = 0; i < printmax; i++)
583 logprintf (LOG_VERBOSE, "%s",
584 pretty_print_address (al->addresses + i));
585 if (i < printmax - 1)
586 logputs (LOG_VERBOSE, ", ");
588 if (printmax != al->count)
589 logputs (LOG_VERBOSE, ", ...");
590 logputs (LOG_VERBOSE, "\n");
593 /* Cache the lookup information. */
595 cache_host_lookup (host, al);
600 /* Resolve HOST to get an address for use with bind(2). Do *not* use
601 this for sockets to be used with connect(2).
603 This is a function separate from lookup_host because the results it
604 returns are different -- it uses the AI_PASSIVE flag to
605 getaddrinfo. Because of this distinction, it doesn't store the
606 results in the cache. It prints nothing and implements no timeouts
607 because it should normally only be used with local addresses
608 (typically "localhost" or numeric addresses of different local
611 Without IPv6, this function just calls lookup_host. */
613 struct address_list *
614 lookup_host_passive (const char *host)
617 struct address_list *al = NULL;
619 struct addrinfo hints, *res;
622 hints.ai_socktype = SOCK_STREAM;
623 hints.ai_family = requested_family;
624 hints.ai_flags = AI_PASSIVE;
626 err = getaddrinfo (host, NULL, &hints, &res);
627 if (err != 0 || res == NULL)
629 al = address_list_from_addrinfo (res);
633 return lookup_host (host, 1);
637 /* Determine whether a URL is acceptable to be followed, according to
638 a list of domains to accept. */
640 accept_domain (struct url *u)
642 assert (u->host != NULL);
645 if (!sufmatch ((const char **)opt.domains, u->host))
648 if (opt.exclude_domains)
650 if (sufmatch ((const char **)opt.exclude_domains, u->host))
656 /* Check whether WHAT is matched in LIST, each element of LIST being a
657 pattern to match WHAT against, using backward matching (see
658 match_backwards() in utils.c).
660 If an element of LIST matched, 1 is returned, 0 otherwise. */
662 sufmatch (const char **list, const char *what)
667 for (i = 0; list[i]; i++)
669 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
670 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
672 /* The domain must be first to reach to beginning. */
680 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
682 struct address_list *al;
684 xfree (key); /* host */
686 al = (struct address_list *)value;
687 assert (al->refcount == 1);
688 address_list_delete (al);
696 if (host_name_addresses_map)
698 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
699 hash_table_destroy (host_name_addresses_map);
700 host_name_addresses_map = NULL;