1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
38 # include <arpa/inet.h>
44 #define NO_ADDRESS NO_DATA
47 #ifdef HAVE_SYS_UTSNAME_H
48 # include <sys/utsname.h>
68 #define IP4_ADDRESS_LENGTH 4
70 /* Mapping between known hosts and to lists of their addresses. */
72 struct hash_table *host_name_addresses_map;
74 /* Lists of addresses. This should eventually be extended to handle
78 int count; /* number of adrresses */
79 unsigned char *buffer; /* buffer which holds all of them. */
81 int faulty; /* number of addresses known not to
83 int refcount; /* so we know whether to free it or
87 #define ADDR_LOCATION(al, index) ((al)->buffer + index * IP4_ADDRESS_LENGTH)
89 /* Get the bounds of the address list. */
92 address_list_get_bounds (struct address_list *al, int *start, int *end)
98 /* Copy address number INDEX to IP_STORE. */
101 address_list_copy_one (struct address_list *al, int index,
102 unsigned char *ip_store)
104 assert (index >= al->faulty && index < al->count);
105 memcpy (ip_store, ADDR_LOCATION (al, index), IP4_ADDRESS_LENGTH);
108 /* Check whether two address lists have all their IPs in common. */
111 address_list_match_all (struct address_list *al1, struct address_list *al2)
115 if (al1->count != al2->count)
117 return 0 == memcmp (al1->buffer, al2->buffer,
118 al1->count * IP4_ADDRESS_LENGTH);
121 /* Mark the INDEXth element of AL as faulty, so that the next time
122 this address list is used, the faulty element will be skipped. */
125 address_list_set_faulty (struct address_list *al, int index)
128 if (al->faulty >= al->count)
129 /* All addresses have been proven faulty. Since there's not much
130 sense in returning the user an empty address list the next
131 time, we'll rather make them all clean, so that they can be
136 /* Create an address_list out of a NULL-terminated list of addresses,
137 as returned by gethostbyname. */
139 static struct address_list *
140 address_list_new (char **h_addr_list)
144 struct address_list *al = xmalloc (sizeof (struct address_list));
146 while (h_addr_list[count])
151 al->buffer = xmalloc (count * IP4_ADDRESS_LENGTH);
154 for (i = 0; i < count; i++)
155 memcpy (ADDR_LOCATION (al, i), h_addr_list[i], IP4_ADDRESS_LENGTH);
160 /* Like address_list_new, but initialized with only one address. */
162 static struct address_list *
163 address_list_new_one (const char *addr)
165 struct address_list *al = xmalloc (sizeof (struct address_list));
168 al->buffer = xmalloc (IP4_ADDRESS_LENGTH);
170 memcpy (ADDR_LOCATION (al, 0), addr, IP4_ADDRESS_LENGTH);
176 address_list_delete (struct address_list *al)
183 address_list_release (struct address_list *al)
186 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
187 if (al->refcount <= 0)
189 DEBUGP (("Deleting unused %p.\n", al));
190 address_list_delete (al);
194 /* The same as inet_ntoa, but without the need for a cast, or for
195 #including the netinet stuff. */
198 pretty_print_address (const unsigned char *addr)
200 return inet_ntoa (*(struct in_addr *)addr);
203 /* Add host name HOST with the address ADDR_TEXT to the cache.
204 ADDR_LIST is a NULL-terminated list of addresses, as in struct
208 cache_host_lookup (const char *host, struct address_list *al)
210 if (!host_name_addresses_map)
211 host_name_addresses_map = make_nocase_string_hash_table (0);
214 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
220 debug_logprintf ("Caching %s =>", host);
221 for (i = 0; i < al->count; i++)
222 debug_logprintf (" %s",
223 pretty_print_address (ADDR_LOCATION (al, i)));
224 debug_logprintf ("\n");
229 struct address_list *
230 lookup_host (const char *host, int silent)
232 struct address_list *al = NULL;
234 struct hostent *hptr;
236 /* If the address is of the form d.d.d.d, no further lookup is
238 addr = (unsigned long)inet_addr (host);
241 /* ADDR is defined to be in network byte order, which is what
242 this returns, so we can just copy it to STORE_IP. However,
243 on big endian 64-bit architectures the value will be stored
244 in the *last*, not first four bytes. OFFSET makes sure that
245 we copy the correct four bytes. */
247 #ifdef WORDS_BIGENDIAN
248 offset = sizeof (unsigned long) - IP4_ADDRESS_LENGTH;
252 return address_list_new_one ((char *)&addr + offset);
255 /* By now we know that the host name we got is not of the form
256 d.d.d.d. Try to find it in our cache of host names. */
257 if (host_name_addresses_map)
258 al = hash_table_get (host_name_addresses_map, host);
262 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
268 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
270 /* Look up the host using gethostbyname(). */
271 hptr = gethostbyname (host);
275 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
280 logprintf (LOG_VERBOSE, _("done.\n"));
282 /* Do all systems have h_addr_list, or is it a newer thing? If the
283 latter, use address_list_new_one. */
284 al = address_list_new (hptr->h_addr_list);
286 /* Cache the lookup information. */
287 cache_host_lookup (host, al);
292 /* Determine whether a URL is acceptable to be followed, according to
293 a list of domains to accept. */
295 accept_domain (struct url *u)
297 assert (u->host != NULL);
300 if (!sufmatch ((const char **)opt.domains, u->host))
303 if (opt.exclude_domains)
305 if (sufmatch ((const char **)opt.exclude_domains, u->host))
311 /* Check whether WHAT is matched in LIST, each element of LIST being a
312 pattern to match WHAT against, using backward matching (see
313 match_backwards() in utils.c).
315 If an element of LIST matched, 1 is returned, 0 otherwise. */
317 sufmatch (const char **list, const char *what)
322 for (i = 0; list[i]; i++)
324 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
325 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
327 /* The domain must be first to reach to beginning. */
334 /* Print error messages for host errors. */
338 /* Can't use switch since some constants are equal (at least on my
339 system), and the compiler signals "duplicate case value". */
340 if (error == HOST_NOT_FOUND
341 || error == NO_RECOVERY
343 || error == NO_ADDRESS
344 || error == TRY_AGAIN)
345 return _("Host not found");
347 return _("Unknown error");
351 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
353 struct address_list *al;
355 xfree (key); /* host */
357 al = (struct address_list *)value;
358 assert (al->refcount == 1);
359 address_list_delete (al);
367 if (host_name_addresses_map)
369 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
370 hash_table_destroy (host_name_addresses_map);
371 host_name_addresses_map = NULL;