1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
39 # include <arpa/inet.h>
45 #define NO_ADDRESS NO_DATA
48 #ifdef HAVE_SYS_UTSNAME_H
49 # include <sys/utsname.h>
70 int ip_default_family = AF_INET6;
72 int ip_default_family = AF_INET;
75 /* Mapping between known hosts and to lists of their addresses. */
77 static struct hash_table *host_name_addresses_map;
79 /* Lists of addresses. This should eventually be extended to handle
83 int count; /* number of adrresses */
84 ip_address *addresses; /* pointer to the string of addresses */
86 int faulty; /* number of addresses known not to work. */
87 int refcount; /* so we know whether to free it or not. */
90 /* Get the bounds of the address list. */
93 address_list_get_bounds (struct address_list *al, int *start, int *end)
99 /* Copy address number INDEX to IP_STORE. */
102 address_list_copy_one (struct address_list *al, int index, ip_address *ip_store)
104 assert (index >= al->faulty && index < al->count && ip_store!=NULL );
105 memcpy (ip_store, al->addresses + index, sizeof (ip_address));
108 /* Check whether two address lists have all their IPs in common. */
111 address_list_match_all (struct address_list *al1, struct address_list *al2)
115 if (al1->count != al2->count)
117 return 0 == memcmp (al1->addresses, al2->addresses,
118 al1->count * sizeof (ip_address));
121 /* Mark the INDEXth element of AL as faulty, so that the next time
122 this address list is used, the faulty element will be skipped. */
125 address_list_set_faulty (struct address_list *al, int index)
127 /* We assume that the address list is traversed in order, so that a
128 "faulty" attempt is always preceded with all-faulty addresses,
129 and this is how Wget uses it. */
130 assert (index == al->faulty);
133 if (al->faulty >= al->count)
134 /* All addresses have been proven faulty. Since there's not much
135 sense in returning the user an empty address list the next
136 time, we'll rather make them all clean, so that they can be
143 * address_list_from_addrinfo
145 * This function transform an addrinfo links list in and address_list.
148 * addrinfo* Linkt list of addrinfo
151 * address_list* New allocated address_list
153 static struct address_list *
154 address_list_from_addrinfo (struct addrinfo *ai)
156 struct address_list *al;
157 struct addrinfo *ai_head = ai;
161 for (ai = ai_head; ai; ai = ai->ai_next)
162 if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6)
167 al = xmalloc (sizeof (struct address_list));
168 al->addresses = xmalloc (cnt * sizeof (ip_address));
173 for (i = 0, ai = ai_head; ai; ai = ai->ai_next)
174 if (ai->ai_family == AF_INET6)
176 struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)ai->ai_addr;
177 memcpy (al->addresses + i, &sin6->sin6_addr, 16);
180 else if (ai->ai_family == AF_INET)
182 struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
183 map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i);
190 /* Create an address_list out of a NULL-terminated list of addresses,
191 as returned by gethostbyname. */
192 static struct address_list *
193 address_list_new (char **h_addr_list)
197 struct address_list *al = xmalloc (sizeof (struct address_list));
199 while (h_addr_list[count])
204 al->addresses = xmalloc (count * sizeof (ip_address));
207 for (i = 0; i < count; i++)
208 map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i);
214 /* Like address_list_new, but initialized with only one address. */
216 static struct address_list *
217 address_list_new_one (ip_address *addr)
219 struct address_list *al = xmalloc (sizeof (struct address_list));
222 al->addresses = xmalloc (sizeof (ip_address));
224 memcpy (al->addresses, addr, sizeof (ip_address));
230 address_list_delete (struct address_list *al)
232 xfree (al->addresses);
237 address_list_release (struct address_list *al)
240 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
241 if (al->refcount <= 0)
243 DEBUGP (("Deleting unused %p.\n", al));
244 address_list_delete (al);
249 * wget_sockaddr_set_address
251 * This function takes an wget_sockaddr and fill in the protocol type,
252 * the port number and the address, there NULL in address means wildcard.
253 * Unsuported adress family will abort the whole programm.
256 * wget_sockaddr* The space to be filled
257 * int The wished protocol
258 * unsigned short The port
259 * const ip_address The Binary IP adress
262 * - Only modify 1. param
265 wget_sockaddr_set_address (wget_sockaddr *sa,
266 int ip_family, unsigned short port, ip_address *addr)
268 if (ip_family == AF_INET)
271 if (!map_ip_to_ipv4 (addr, &addr4))
272 /* should the callers have prevented this? */
274 sa->sin.sin_family = ip_family;
275 sa->sin.sin_port = htons (port);
277 memset (&sa->sin.sin_addr, 0, sizeof(ip4_address));
279 memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address));
283 if (ip_family == AF_INET6)
285 sa->sin6.sin6_family = ip_family;
286 sa->sin6.sin6_port = htons (port);
288 memset (&sa->sin6.sin6_addr, 0 , 16);
290 memcpy (&sa->sin6.sin6_addr, addr, 16);
298 * wget_sockaddr_set_port
300 * This funtion only fill the port of the socket information.
301 * If the protocol is not supported nothing is done.
302 * Unsuported adress family will abort the whole programm.
305 * that the IP-Protocol already is set.
308 * wget_sockaddr* The space there port should be entered
309 * unsigned int The port that should be entered in host order
312 * - Only modify 1. param
315 wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port)
317 if (sa->sa.sa_family == AF_INET)
319 sa->sin.sin_port = htons (port);
323 if (sa->sa.sa_family == AF_INET6)
325 sa->sin6.sin6_port = htons (port);
333 * wget_sockaddr_get_addr
335 * This function return the adress from an sockaddr as byte string.
336 * Unsuported adress family will abort the whole programm.
339 * that the IP-Protocol already is set.
342 * wget_sockaddr* Socket Information
345 * unsigned char * IP address as byte string.
348 wget_sockaddr_get_addr (wget_sockaddr *sa)
350 if (sa->sa.sa_family == AF_INET)
351 return &sa->sin.sin_addr;
353 if (sa->sa.sa_family == AF_INET6)
354 return &sa->sin6.sin6_addr;
362 * wget_sockaddr_get_port
364 * This function only return the port from the input structure
365 * Unsuported adress family will abort the whole programm.
368 * that the IP-Protocol already is set.
371 * wget_sockaddr* Information where to get the port
374 * unsigned short Port Number in host order.
377 wget_sockaddr_get_port (const wget_sockaddr *sa)
379 if (sa->sa.sa_family == AF_INET)
380 return htons (sa->sin.sin_port);
382 if (sa->sa.sa_family == AF_INET6)
383 return htons (sa->sin6.sin6_port);
386 /* do not complain about return nothing */
393 * This function return the length of the sockaddr corresponding to
394 * the acutall prefered protocol for (bind, connect etc...)
395 * Unsuported adress family will abort the whole programm.
398 * that the IP-Protocol already is set.
401 * - Public IP-Family Information
404 * int structure length for socket options
409 if (ip_default_family == AF_INET)
410 return sizeof (struct sockaddr_in);
412 if (ip_default_family == AF_INET6)
413 return sizeof (struct sockaddr_in6);
416 /* do not complain about return nothing */
421 * Map an IPv4 adress to the internal adress format.
424 map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip)
427 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
428 memcpy ((char *)ip + 12, ipv4 , 4);
429 memcpy ((char *)ip + 0, ipv64, 12);
431 if ((char *)ip != (char *)ipv4)
432 memcpy (ip, ipv4, 4);
436 /* Detect whether an IP adress represents an IPv4 address and, if so,
437 copy it to IPV4. 0 is returned on failure.
438 This operation always succeeds when Wget is compiled without IPv6.
439 If IPV4 is NULL, don't copy, just detect. */
442 map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4)
445 static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
446 if (0 != memcmp (ip, ipv64, 12))
449 memcpy (ipv4, (char *)ip + 12, 4);
452 memcpy (ipv4, (char *)ip, 4);
457 /* Pretty-print ADDR. When compiled without IPv6, this is the same as
458 inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
462 pretty_print_address (ip_address *addr)
466 static char buf[128];
468 if (map_ip_to_ipv4 (addr, &addr4))
469 return inet_ntoa (*(struct in_addr *)&addr4);
471 if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf)))
475 return inet_ntoa (*(struct in_addr *)addr);
478 /* Add host name HOST with the address ADDR_TEXT to the cache.
479 ADDR_LIST is a NULL-terminated list of addresses, as in struct
483 cache_host_lookup (const char *host, struct address_list *al)
485 if (!host_name_addresses_map)
486 host_name_addresses_map = make_nocase_string_hash_table (0);
489 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
495 debug_logprintf ("Caching %s =>", host);
496 for (i = 0; i < al->count; i++)
497 debug_logprintf (" %s", pretty_print_address (al->addresses + i));
498 debug_logprintf ("\n");
503 struct address_list *
504 lookup_host (const char *host, int silent)
506 struct address_list *al = NULL;
507 unsigned long addr_ipv4; /* #### use a 32-bit type here. */
510 /* First, try to check whether the address is already a numeric
514 if (inet_pton (AF_INET6, host, &addr) > 0)
515 return address_list_new_one (&addr);
518 addr_ipv4 = (unsigned long)inet_addr (host);
519 if ((int)addr_ipv4 != -1)
521 /* ADDR is defined to be in network byte order, which is what
522 this returns, so we can just copy it to STORE_IP. However,
523 on big endian 64-bit architectures the value will be stored
524 in the *last*, not first four bytes. OFFSET makes sure that
525 we copy the correct four bytes. */
527 #ifdef WORDS_BIGENDIAN
528 offset = sizeof (unsigned long) - sizeof (ip4_address);
530 map_ipv4_to_ip ((ip4_address *)((char *)&addr_ipv4 + offset), &addr);
531 return address_list_new_one (&addr);
534 if (host_name_addresses_map)
536 al = hash_table_get (host_name_addresses_map, host);
540 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
547 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
551 struct addrinfo hints, *ai;
554 memset (&hints, 0, sizeof (hints));
555 if (ip_default_family == AF_INET)
556 hints.ai_family = AF_INET;
558 hints.ai_family = PF_UNSPEC;
559 hints.ai_socktype = SOCK_STREAM;
560 err = getaddrinfo (host, NULL, &hints, &ai);
562 if (err != 0 || ai == NULL)
565 logprintf (LOG_VERBOSE, _("failed: %s.\n"), gai_strerror (err));
568 al = address_list_from_addrinfo (ai);
573 struct hostent *hptr = gethostbyname (host);
577 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
580 /* Do all systems have h_addr_list, or is it a newer thing? If
581 the latter, use address_list_new_one. */
582 al = address_list_new (hptr->h_addr_list);
587 logprintf (LOG_VERBOSE, _("done.\n"));
589 /* Cache the lookup information. */
590 cache_host_lookup (host, al);
595 /* Determine whether a URL is acceptable to be followed, according to
596 a list of domains to accept. */
598 accept_domain (struct url *u)
600 assert (u->host != NULL);
603 if (!sufmatch ((const char **)opt.domains, u->host))
606 if (opt.exclude_domains)
608 if (sufmatch ((const char **)opt.exclude_domains, u->host))
614 /* Check whether WHAT is matched in LIST, each element of LIST being a
615 pattern to match WHAT against, using backward matching (see
616 match_backwards() in utils.c).
618 If an element of LIST matched, 1 is returned, 0 otherwise. */
620 sufmatch (const char **list, const char *what)
625 for (i = 0; list[i]; i++)
627 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
628 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
630 /* The domain must be first to reach to beginning. */
637 /* Print error messages for host errors. */
641 /* Can't use switch since some constants are equal (at least on my
642 system), and the compiler signals "duplicate case value". */
643 if (error == HOST_NOT_FOUND
644 || error == NO_RECOVERY
646 || error == NO_ADDRESS
647 || error == TRY_AGAIN)
648 return _("Host not found");
650 return _("Unknown error");
654 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
656 struct address_list *al;
658 xfree (key); /* host */
660 al = (struct address_list *)value;
661 assert (al->refcount == 1);
662 address_list_delete (al);
670 if (host_name_addresses_map)
672 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
673 hash_table_destroy (host_name_addresses_map);
674 host_name_addresses_map = NULL;