1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
38 # include <arpa/inet.h>
44 #define NO_ADDRESS NO_DATA
47 #ifdef HAVE_SYS_UTSNAME_H
48 # include <sys/utsname.h>
62 /* Mapping between all known hosts to their addresses (n.n.n.n). */
64 /* #### We should map to *lists* of IP addresses. */
66 struct hash_table *host_name_address_map;
68 /* The following two tables are obsolete, since we no longer do host
71 /* Mapping between all known addresses (n.n.n.n) to their hosts. This
72 is the inverse of host_name_address_map. These two tables share
73 the strdup'ed strings. */
74 struct hash_table *host_address_name_map;
76 /* Mapping between auxilliary (slave) and master host names. */
77 struct hash_table *host_slave_master_map;
79 /* The same as gethostbyname, but supports internet addresses of the
80 form `N.N.N.N'. On some systems gethostbyname() knows how to do
81 this automatically. */
83 ngethostbyname (const char *name)
88 addr = (unsigned long)inet_addr (name);
90 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
92 hp = gethostbyname (name);
96 /* Add host name HOST with the address ADDR_TEXT to the cache.
97 Normally this means that the (HOST, ADDR_TEXT) pair will be to
98 host_name_address_map and to host_address_name_map. (It is the
99 caller's responsibility to make sure that HOST is not already in
100 host_name_address_map.)
102 If the ADDR_TEXT has already been seen and belongs to another host,
103 HOST will be added to host_slave_master_map instead. */
106 add_host_to_cache (const char *host, const char *addr_text)
108 char *canonical_name = hash_table_get (host_address_name_map, addr_text);
111 DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
112 host, canonical_name));
113 /* We've already dealt with that host under another name. */
114 hash_table_put (host_slave_master_map,
115 xstrdup_lower (host),
116 xstrdup_lower (canonical_name));
120 /* This is really the first time we're dealing with that host. */
121 char *h_copy = xstrdup_lower (host);
122 char *a_copy = xstrdup (addr_text);
123 DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
124 hash_table_put (host_name_address_map, h_copy, a_copy);
125 hash_table_put (host_address_name_map, a_copy, h_copy);
129 /* Store the address of HOSTNAME, internet-style (four octets in
130 network order), to WHERE. First try to get the address from the
131 cache; if it is not available, call the DNS functions and update
134 Return 1 on successful finding of the hostname, 0 otherwise. */
136 store_hostaddress (unsigned char *where, const char *hostname)
140 char *canonical_name;
141 struct hostent *hptr;
145 /* If the address is of the form d.d.d.d, there will be no trouble
147 addr = (unsigned long)inet_addr (hostname);
148 /* If we have the numeric address, just store it. */
151 /* ADDR is defined to be in network byte order, meaning the code
152 works on little and big endian 32-bit architectures without
153 change. On big endian 64-bit architectures we need to be
154 careful to copy the correct four bytes. */
157 #ifdef WORDS_BIGENDIAN
158 offset = sizeof (unsigned long) - 4;
162 memcpy (where, (char *)&addr + offset, 4);
166 /* By now we know that the address is not of the form d.d.d.d. Try
167 to find it in our cache of host addresses. */
168 addr_text = hash_table_get (host_name_address_map, hostname);
171 DEBUGP (("Found %s in host_name_address_map: %s\n",
172 hostname, addr_text));
173 addr = (unsigned long)inet_addr (addr_text);
177 /* Maybe this host is known to us under another name. If so, we'll
178 find it in host_slave_master_map, and use the master name to find
179 its address in host_name_address_map. */
180 canonical_name = hash_table_get (host_slave_master_map, hostname);
183 addr_text = hash_table_get (host_name_address_map, canonical_name);
184 assert (addr_text != NULL);
185 DEBUGP (("Found %s as slave of %s -> %s\n",
186 hostname, canonical_name, addr_text));
187 addr = (unsigned long)inet_addr (addr_text);
191 /* Since all else has failed, let's try gethostbyname(). Note that
192 we use gethostbyname() rather than ngethostbyname(), because we
193 already know that the address is not numerical. */
194 hptr = gethostbyname (hostname);
197 /* Copy the address of the host to socket description. */
198 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
199 assert (hptr->h_length == 4);
201 /* Now that we've gone through the truoble of calling
202 gethostbyname(), we can store this valuable information to the
203 cache. First, we have to look for it by address to know if it's
204 already in the cache by another name. */
205 /* Originally, we copied to in.s_addr, but it appears to be missing
207 memcpy (&in, *hptr->h_addr_list, sizeof (in));
208 inet_s = inet_ntoa (in);
209 add_host_to_cache (hostname, inet_s);
213 /* Determine whether a URL is acceptable to be followed, according to
214 a list of domains to accept. */
216 accept_domain (struct url *u)
218 assert (u->host != NULL);
221 if (!sufmatch ((const char **)opt.domains, u->host))
224 if (opt.exclude_domains)
226 if (sufmatch ((const char **)opt.exclude_domains, u->host))
232 /* Check whether WHAT is matched in LIST, each element of LIST being a
233 pattern to match WHAT against, using backward matching (see
234 match_backwards() in utils.c).
236 If an element of LIST matched, 1 is returned, 0 otherwise. */
238 sufmatch (const char **list, const char *what)
243 for (i = 0; list[i]; i++)
245 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
246 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
248 /* The domain must be first to reach to beginning. */
255 /* Print error messages for host errors. */
259 /* Can't use switch since some constants are equal (at least on my
260 system), and the compiler signals "duplicate case value". */
261 if (error == HOST_NOT_FOUND
262 || error == NO_RECOVERY
264 || error == NO_ADDRESS
265 || error == TRY_AGAIN)
266 return _("Host not found");
268 return _("Unknown error");
274 /* host_name_address_map and host_address_name_map share the
275 strings. Because of that, calling free_keys_and_values once
276 suffices for both. */
277 free_keys_and_values (host_name_address_map);
278 hash_table_destroy (host_name_address_map);
279 hash_table_destroy (host_address_name_map);
280 free_keys_and_values (host_slave_master_map);
281 hash_table_destroy (host_slave_master_map);
287 host_name_address_map = make_string_hash_table (0);
288 host_address_name_map = make_string_hash_table (0);
289 host_slave_master_map = make_string_hash_table (0);