1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 #include <sys/types.h>
48 # define SET_H_ERRNO(err) WSASetLastError (err)
50 # include <sys/socket.h>
51 # include <netinet/in.h>
53 # include <arpa/inet.h>
56 # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60 # define NO_ADDRESS NO_DATA
81 /* Mapping between known hosts and to lists of their addresses. */
83 static struct hash_table *host_name_addresses_map;
85 /* Lists of IP addresses that result from running DNS queries. See
86 lookup_host for details. */
89 int count; /* number of adrresses */
90 ip_address *addresses; /* pointer to the string of addresses */
92 int faulty; /* number of addresses known not to work. */
93 int connected; /* whether we were able to connect to
94 one of the addresses in the list,
97 int refcount; /* reference count; when it drops to
98 0, the entry is freed. */
101 /* Get the bounds of the address list. */
104 address_list_get_bounds (const struct address_list *al, int *start, int *end)
110 /* Return a pointer to the address at position POS. */
113 address_list_address_at (const struct address_list *al, int pos)
115 assert (pos >= al->faulty && pos < al->count);
116 return al->addresses + pos;
119 /* Return 1 if IP is one of the addresses in AL. */
122 address_list_find (const struct address_list *al, const ip_address *ip)
128 for (i = 0; i < al->count; i++)
130 ip_address *cur = al->addresses + i;
131 if (cur->type == IPV4_ADDRESS
132 && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
134 ADDRESS_IPV4_IN_ADDR (ip).s_addr))
140 for (i = 0; i < al->count; i++)
142 ip_address *cur = al->addresses + i;
143 if (cur->type == IPV6_ADDRESS
144 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
145 && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
147 && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
148 &ADDRESS_IPV6_IN6_ADDR (ip)))
152 #endif /* ENABLE_IPV6 */
159 /* Mark the INDEXth element of AL as faulty, so that the next time
160 this address list is used, the faulty element will be skipped. */
163 address_list_set_faulty (struct address_list *al, int index)
165 /* We assume that the address list is traversed in order, so that a
166 "faulty" attempt is always preceded with all-faulty addresses,
167 and this is how Wget uses it. */
168 assert (index == al->faulty);
171 if (al->faulty >= al->count)
172 /* All addresses have been proven faulty. Since there's not much
173 sense in returning the user an empty address list the next
174 time, we'll rather make them all clean, so that they can be
179 /* Set the "connected" flag to true. This flag used by connect.c to
180 see if the host perhaps needs to be resolved again. */
183 address_list_set_connected (struct address_list *al)
188 /* Return the value of the "connected" flag. */
191 address_list_connected_p (const struct address_list *al)
193 return al->connected;
198 /* Create an address_list from the addresses in the given struct
201 static struct address_list *
202 address_list_from_addrinfo (const struct addrinfo *ai)
204 struct address_list *al;
205 const struct addrinfo *ptr;
210 for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
211 if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
216 al = xnew0 (struct address_list);
217 al->addresses = xnew_array (ip_address, cnt);
222 for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
223 if (ptr->ai_family == AF_INET6)
225 const struct sockaddr_in6 *sin6 =
226 (const struct sockaddr_in6 *)ptr->ai_addr;
227 ip->type = IPV6_ADDRESS;
228 ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
229 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
230 ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
234 else if (ptr->ai_family == AF_INET)
236 const struct sockaddr_in *sin =
237 (const struct sockaddr_in *)ptr->ai_addr;
238 ip->type = IPV4_ADDRESS;
239 ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
242 assert (ip - al->addresses == cnt);
246 #else /* not ENABLE_IPV6 */
248 /* Create an address_list from a NULL-terminated vector of IPv4
249 addresses. This kind of vector is returned by gethostbyname. */
251 static struct address_list *
252 address_list_from_ipv4_addresses (char **vec)
255 struct address_list *al = xnew0 (struct address_list);
262 al->addresses = xnew_array (ip_address, count);
266 for (i = 0; i < count; i++)
268 ip_address *ip = &al->addresses[i];
269 ip->type = IPV4_ADDRESS;
270 memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
276 #endif /* not ENABLE_IPV6 */
279 address_list_delete (struct address_list *al)
281 xfree (al->addresses);
285 /* Mark the address list as being no longer in use. This will reduce
286 its reference count which will cause the list to be freed when the
290 address_list_release (struct address_list *al)
293 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
294 if (al->refcount <= 0)
296 DEBUGP (("Deleting unused %p.\n", al));
297 address_list_delete (al);
301 /* Versions of gethostbyname and getaddrinfo that support timeout. */
305 struct ghbnwt_context {
306 const char *host_name;
307 struct hostent *hptr;
311 gethostbyname_with_timeout_callback (void *arg)
313 struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
314 ctx->hptr = gethostbyname (ctx->host_name);
317 /* Just like gethostbyname, except it times out after TIMEOUT seconds.
318 In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
319 The function makes sure that when NULL is returned for reasons
320 other than timeout, errno is reset. */
322 static struct hostent *
323 gethostbyname_with_timeout (const char *host_name, double timeout)
325 struct ghbnwt_context ctx;
326 ctx.host_name = host_name;
327 if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
329 SET_H_ERRNO (HOST_NOT_FOUND);
338 /* Print error messages for host errors. */
340 host_errstr (int error)
342 /* Can't use switch since some of these constants can be equal,
343 which makes the compiler complain about duplicate case
345 if (error == HOST_NOT_FOUND
346 || error == NO_RECOVERY
348 || error == NO_ADDRESS)
349 return _("Unknown host");
350 else if (error == TRY_AGAIN)
351 /* Message modeled after what gai_strerror returns in similar
353 return _("Temporary failure in name resolution");
355 return _("Unknown error");
358 #else /* ENABLE_IPV6 */
360 struct gaiwt_context {
363 const struct addrinfo *hints;
364 struct addrinfo **res;
369 getaddrinfo_with_timeout_callback (void *arg)
371 struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
372 ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
375 /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
376 In case of timeout, the EAI_SYSTEM error code is returned and errno
377 is set to ETIMEDOUT. */
380 getaddrinfo_with_timeout (const char *node, const char *service,
381 const struct addrinfo *hints, struct addrinfo **res,
384 struct gaiwt_context ctx;
386 ctx.service = service;
390 if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
395 return ctx.exit_code;
398 #endif /* ENABLE_IPV6 */
400 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
401 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
405 pretty_print_address (const ip_address *addr)
410 return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
414 static char buf[128];
415 inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
417 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
419 /* append "%SCOPE_ID" for all ?non-global? addresses */
420 char *p = buf + strlen (buf);
422 number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
426 buf[sizeof (buf) - 1] = '\0';
435 /* Add host name HOST with the address ADDR_TEXT to the cache.
436 ADDR_LIST is a NULL-terminated list of addresses, as in struct
440 cache_host_lookup (const char *host, struct address_list *al)
442 if (!host_name_addresses_map)
443 host_name_addresses_map = make_nocase_string_hash_table (0);
446 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
452 debug_logprintf ("Caching %s =>", host);
453 for (i = 0; i < al->count; i++)
454 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
455 debug_logprintf ("\n");
460 /* Remove HOST from Wget's DNS cache. Does nothing is HOST is not in
464 forget_host_lookup (const char *host)
466 struct address_list *al = hash_table_get (host_name_addresses_map, host);
469 address_list_release (al);
470 hash_table_remove (host_name_addresses_map, host);
474 /* Look up HOST in DNS and return a list of IP addresses. The
475 addresses in the list are in the same order in which
476 gethostbyname/getaddrinfo returned them.
478 This function caches its result so that, if the same host is passed
479 the second time, the addresses are returned without DNS lookup. If
480 you want to force lookup, call forget_host_lookup() prior to this
481 function, or set opt.dns_cache to 0 to globally disable caching.
483 If SILENT is non-zero, progress messages are not printed. */
485 struct address_list *
486 lookup_host (const char *host, int silent)
488 struct address_list *al = NULL;
491 /* If we're not using getaddrinfo, first check if HOST specifies a
492 numeric IPv4 address. gethostbyname is not required to accept
493 dotted-decimal IPv4 addresses, and some implementations (e.g. the
494 Ultrix one and possibly Winsock) indeed don't. */
496 uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
497 if (addr_ipv4 != (uint32_t) -1)
499 /* No need to cache host->addr relation, just return the
502 vec[0] = (char *)&addr_ipv4;
504 return address_list_from_ipv4_addresses (vec);
509 /* Try to find the host in the cache. */
511 if (host_name_addresses_map)
513 al = hash_table_get (host_name_addresses_map, host);
516 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
522 /* No luck with the cache; resolve the host name. */
525 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
530 struct addrinfo hints, *res;
533 hints.ai_socktype = SOCK_STREAM;
534 hints.ai_family = AF_UNSPEC; /* #### should look at opt.ipv4_only
538 err = getaddrinfo_with_timeout (host, NULL, &hints, &res, opt.dns_timeout);
539 if (err != 0 || res == NULL)
542 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
543 err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
546 al = address_list_from_addrinfo (res);
550 logprintf (LOG_VERBOSE, _("failed: No IPv4/IPv6 addresses.\n"));
556 struct hostent *hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
561 if (errno != ETIMEDOUT)
562 logprintf (LOG_VERBOSE, _("failed: %s.\n"),
563 host_errstr (h_errno));
565 logputs (LOG_VERBOSE, _("failed: timed out.\n"));
569 /* Do older systems have h_addr_list? */
570 al = address_list_from_ipv4_addresses (hptr->h_addr_list);
574 /* Print the addresses determined by DNS lookup, but no more than
579 int printmax = al->count <= 3 ? al->count : 3;
580 for (i = 0; i < printmax; i++)
582 logprintf (LOG_VERBOSE, "%s",
583 pretty_print_address (al->addresses + i));
584 if (i < printmax - 1)
585 logputs (LOG_VERBOSE, ", ");
587 if (printmax != al->count)
588 logputs (LOG_VERBOSE, ", ...");
589 logputs (LOG_VERBOSE, "\n");
592 /* Cache the lookup information. */
594 cache_host_lookup (host, al);
599 /* Resolve HOST to get an address for use with bind(2). Do *not* use
600 this for sockets to be used with connect(2).
602 This is a function separate from lookup_host because the results it
603 returns are different -- it uses the AI_PASSIVE flag to
604 getaddrinfo. Because of this distinction, it doesn't store the
605 results in the cache. It prints nothing and implements no timeouts
606 because it should normally only be used with local addresses
607 (typically "localhost" or numeric addresses of different local
610 Without IPv6, this function just calls lookup_host. */
612 struct address_list *
613 lookup_host_passive (const char *host)
616 struct address_list *al = NULL;
618 struct addrinfo hints, *res;
621 hints.ai_socktype = SOCK_STREAM;
622 hints.ai_family = AF_UNSPEC; /* #### should look at opt.ipv4_only
624 hints.ai_flags = AI_PASSIVE;
626 err = getaddrinfo (host, NULL, &hints, &res);
627 if (err != 0 || res == NULL)
629 al = address_list_from_addrinfo (res);
633 return lookup_host (host, 1);
637 /* Determine whether a URL is acceptable to be followed, according to
638 a list of domains to accept. */
640 accept_domain (struct url *u)
642 assert (u->host != NULL);
645 if (!sufmatch ((const char **)opt.domains, u->host))
648 if (opt.exclude_domains)
650 if (sufmatch ((const char **)opt.exclude_domains, u->host))
656 /* Check whether WHAT is matched in LIST, each element of LIST being a
657 pattern to match WHAT against, using backward matching (see
658 match_backwards() in utils.c).
660 If an element of LIST matched, 1 is returned, 0 otherwise. */
662 sufmatch (const char **list, const char *what)
667 for (i = 0; list[i]; i++)
669 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
670 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
672 /* The domain must be first to reach to beginning. */
680 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
682 struct address_list *al;
684 xfree (key); /* host */
686 al = (struct address_list *)value;
687 assert (al->refcount == 1);
688 address_list_delete (al);
696 if (host_name_addresses_map)
698 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
699 hash_table_destroy (host_name_addresses_map);
700 host_name_addresses_map = NULL;