1 /* Host name resolution and matching.
2 Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
38 # include <arpa/inet.h>
44 #define NO_ADDRESS NO_DATA
47 #ifdef HAVE_SYS_UTSNAME_H
48 # include <sys/utsname.h>
62 #define IP4_ADDRESS_LENGTH 4
64 /* Mapping between known hosts and to lists of their addresses. */
66 struct hash_table *host_name_addresses_map;
68 /* Lists of addresses. This should eventually be extended to handle
72 int count; /* number of adrresses */
73 unsigned char *buffer; /* buffer which holds all of them. */
75 int faulty; /* number of addresses known not to
77 int refcount; /* so we know whether to free it or
81 #define ADDR_LOCATION(al, index) ((al)->buffer + index * IP4_ADDRESS_LENGTH)
83 /* Get the bounds of the address list. */
86 address_list_get_bounds (struct address_list *al, int *start, int *end)
92 /* Copy address number INDEX to IP_STORE. */
95 address_list_copy_one (struct address_list *al, int index,
96 unsigned char *ip_store)
98 assert (index >= al->faulty && index < al->count);
99 memcpy (ip_store, ADDR_LOCATION (al, index), IP4_ADDRESS_LENGTH);
102 /* Check whether two address lists have all their IPs in common. */
105 address_list_match_all (struct address_list *al1, struct address_list *al2)
109 if (al1->count != al2->count)
111 return 0 == memcmp (al1->buffer, al2->buffer,
112 al1->count * IP4_ADDRESS_LENGTH);
115 /* Mark the INDEXth element of AL as faulty, so that the next time
116 this address list is used, the faulty element will be skipped. */
119 address_list_set_faulty (struct address_list *al, int index)
122 if (al->faulty >= al->count)
123 /* All addresses have been proven faulty. Since there's not much
124 sense in returning the user an empty address list the next
125 time, we'll rather make them all clean, so that they can be
130 /* Create an address_list out of a NULL-terminated list of addresses,
131 as returned by gethostbyname. */
133 static struct address_list *
134 address_list_new (char **h_addr_list)
138 struct address_list *al = xmalloc (sizeof (struct address_list));
140 while (h_addr_list[count])
145 al->buffer = xmalloc (count * IP4_ADDRESS_LENGTH);
148 for (i = 0; i < count; i++)
149 memcpy (ADDR_LOCATION (al, i), h_addr_list[i], IP4_ADDRESS_LENGTH);
155 address_list_delete (struct address_list *al)
162 address_list_release (struct address_list *al)
165 DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
166 if (al->refcount <= 0)
168 DEBUGP (("Deleting unused %p.\n", al));
169 address_list_delete (al);
173 /* The same as inet_ntoa, but without the need for a cast, or for
174 #including the netinet stuff. */
177 pretty_print_address (const unsigned char *addr)
179 return inet_ntoa (*(struct in_addr *)addr);
182 /* Add host name HOST with the address ADDR_TEXT to the cache.
183 ADDR_LIST is a NULL-terminated list of addresses, as in struct
187 cache_host_lookup (const char *host, struct address_list *al)
189 if (!host_name_addresses_map)
190 host_name_addresses_map = make_nocase_string_hash_table (0);
193 hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
199 debug_logprintf ("Caching %s =>", host);
200 for (i = 0; i < al->count; i++)
201 debug_logprintf (" %s",
202 pretty_print_address (ADDR_LOCATION (al, i)));
203 debug_logprintf ("\n");
208 struct address_list *
209 lookup_host (const char *host, int silent)
211 struct address_list *al = NULL;
213 struct hostent *hptr;
215 /* If the address is of the form d.d.d.d, no further lookup is
217 addr = (unsigned long)inet_addr (host);
220 char tmpstore[IP4_ADDRESS_LENGTH];
221 char *lst[] = { tmpstore, NULL };
223 /* ADDR is defined to be in network byte order, which is what
224 this returns, so we can just copy it to STORE_IP. However,
225 on big endian 64-bit architectures the value will be stored
226 in the *last*, not first four bytes. OFFSET makes sure that
227 we copy the correct four bytes. */
229 #ifdef WORDS_BIGENDIAN
230 offset = sizeof (unsigned long) - IP4_ADDRESS_LENGTH;
234 memcpy (tmpstore, (char *)&addr + offset, IP4_ADDRESS_LENGTH);
235 return address_list_new (lst);
238 /* By now we know that the host name we got is not of the form
239 d.d.d.d. Try to find it in our cache of host names. */
240 if (host_name_addresses_map)
241 al = hash_table_get (host_name_addresses_map, host);
245 DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
251 logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
253 /* Look up the host using gethostbyname(). */
254 hptr = gethostbyname (host);
258 logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
263 logprintf (LOG_VERBOSE, _("done.\n"));
265 al = address_list_new (hptr->h_addr_list);
267 /* Cache the lookup information. */
268 cache_host_lookup (host, al);
273 /* Determine whether a URL is acceptable to be followed, according to
274 a list of domains to accept. */
276 accept_domain (struct url *u)
278 assert (u->host != NULL);
281 if (!sufmatch ((const char **)opt.domains, u->host))
284 if (opt.exclude_domains)
286 if (sufmatch ((const char **)opt.exclude_domains, u->host))
292 /* Check whether WHAT is matched in LIST, each element of LIST being a
293 pattern to match WHAT against, using backward matching (see
294 match_backwards() in utils.c).
296 If an element of LIST matched, 1 is returned, 0 otherwise. */
298 sufmatch (const char **list, const char *what)
303 for (i = 0; list[i]; i++)
305 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
306 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
308 /* The domain must be first to reach to beginning. */
315 /* Print error messages for host errors. */
319 /* Can't use switch since some constants are equal (at least on my
320 system), and the compiler signals "duplicate case value". */
321 if (error == HOST_NOT_FOUND
322 || error == NO_RECOVERY
324 || error == NO_ADDRESS
325 || error == TRY_AGAIN)
326 return _("Host not found");
328 return _("Unknown error");
332 host_cleanup_mapper (void *key, void *value, void *arg_ignored)
334 struct address_list *al;
336 xfree (key); /* host */
338 al = (struct address_list *)value;
339 assert (al->refcount == 1);
340 address_list_delete (al);
348 if (host_name_addresses_map)
350 hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
351 hash_table_destroy (host_name_addresses_map);
352 host_name_addresses_map = NULL;