1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
38 # include <arpa/inet.h>
44 #define NO_ADDRESS NO_DATA
47 #ifdef HAVE_SYS_UTSNAME_H
48 # include <sys/utsname.h>
62 /* Mapping between all known hosts to their addresses (n.n.n.n). */
63 struct hash_table *host_name_address_map;
65 /* Mapping between all known addresses (n.n.n.n) to their hosts. This
66 is the inverse of host_name_address_map. These two tables share
67 the strdup'ed strings. */
68 struct hash_table *host_address_name_map;
70 /* Mapping between auxilliary (slave) and master host names. */
71 struct hash_table *host_slave_master_map;
73 /* Utility function: like xstrdup(), but also lowercases S. */
76 xstrdup_lower (const char *s)
78 char *copy = xstrdup (s);
85 /* The same as gethostbyname, but supports internet addresses of the
86 form `N.N.N.N'. On some systems gethostbyname() knows how to do
87 this automatically. */
89 ngethostbyname (const char *name)
94 addr = (unsigned long)inet_addr (name);
96 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
98 hp = gethostbyname (name);
102 /* Add host name HOST with the address ADDR_TEXT to the cache.
103 Normally this means that the (HOST, ADDR_TEXT) pair will be to
104 host_name_address_map and to host_address_name_map. (It is the
105 caller's responsibility to make sure that HOST is not already in
106 host_name_address_map.)
108 If the ADDR_TEXT has already been seen and belongs to another host,
109 HOST will be added to host_slave_master_map instead. */
112 add_host_to_cache (const char *host, const char *addr_text)
114 char *canonical_name = hash_table_get (host_address_name_map, addr_text);
117 DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
118 host, canonical_name));
119 /* We've already dealt with that host under another name. */
120 hash_table_put (host_slave_master_map,
121 xstrdup_lower (host),
122 xstrdup_lower (canonical_name));
126 /* This is really the first time we're dealing with that host. */
127 char *h_copy = xstrdup_lower (host);
128 char *a_copy = xstrdup (addr_text);
129 DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
130 hash_table_put (host_name_address_map, h_copy, a_copy);
131 hash_table_put (host_address_name_map, a_copy, h_copy);
135 /* Store the address of HOSTNAME, internet-style (four octets in
136 network order), to WHERE. First try to get the address from the
137 cache; if it is not available, call the DNS functions and update
140 Return 1 on successful finding of the hostname, 0 otherwise. */
142 store_hostaddress (unsigned char *where, const char *hostname)
146 char *canonical_name;
147 struct hostent *hptr;
151 /* If the address is of the form d.d.d.d, there will be no trouble
153 addr = (unsigned long)inet_addr (hostname);
154 /* If we have the numeric address, just store it. */
157 /* ADDR is defined to be in network byte order, meaning the code
158 works on little and big endian 32-bit architectures without
159 change. On big endian 64-bit architectures we need to be
160 careful to copy the correct four bytes. */
163 #ifdef WORDS_BIGENDIAN
164 offset = sizeof (unsigned long) - 4;
168 memcpy (where, (char *)&addr + offset, 4);
172 /* By now we know that the address is not of the form d.d.d.d. Try
173 to find it in our cache of host addresses. */
174 addr_text = hash_table_get (host_name_address_map, hostname);
177 DEBUGP (("Found %s in host_name_address_map: %s\n",
178 hostname, addr_text));
179 addr = (unsigned long)inet_addr (addr_text);
183 /* Maybe this host is known to us under another name. If so, we'll
184 find it in host_slave_master_map, and use the master name to find
185 its address in host_name_address_map. */
186 canonical_name = hash_table_get (host_slave_master_map, hostname);
189 addr_text = hash_table_get (host_name_address_map, canonical_name);
190 assert (addr_text != NULL);
191 DEBUGP (("Found %s as slave of %s -> %s\n",
192 hostname, canonical_name, addr_text));
193 addr = (unsigned long)inet_addr (addr_text);
197 /* Since all else has failed, let's try gethostbyname(). Note that
198 we use gethostbyname() rather than ngethostbyname(), because we
199 already know that the address is not numerical. */
200 hptr = gethostbyname (hostname);
203 /* Copy the address of the host to socket description. */
204 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
205 assert (hptr->h_length == 4);
207 /* Now that we've gone through the truoble of calling
208 gethostbyname(), we can store this valuable information to the
209 cache. First, we have to look for it by address to know if it's
210 already in the cache by another name. */
211 /* Originally, we copied to in.s_addr, but it appears to be missing
213 memcpy (&in, *hptr->h_addr_list, sizeof (in));
214 inet_s = inet_ntoa (in);
215 add_host_to_cache (hostname, inet_s);
219 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
220 is referenced by more than one name, "real" name is considered to
221 be the first one encountered in the past. */
223 realhost (const char *host)
226 struct hostent *hptr;
229 DEBUGP (("Checking for %s in host_name_address_map.\n", host));
230 if (hash_table_contains (host_name_address_map, host))
232 DEBUGP (("Found; %s was already used, by that name.\n", host));
233 return xstrdup_lower (host);
236 DEBUGP (("Checking for %s in host_slave_master_map.\n", host));
237 master_name = hash_table_get (host_slave_master_map, host);
241 DEBUGP (("Found; %s was already used, by the name %s.\n",
243 return xstrdup (master_name);
246 DEBUGP (("First time I hear about %s by that name; looking it up.\n",
248 hptr = ngethostbyname (host);
252 /* Originally, we copied to in.s_addr, but it appears to be
253 missing on some systems. */
254 memcpy (&in, *hptr->h_addr_list, sizeof (in));
255 inet_s = inet_ntoa (in);
257 add_host_to_cache (host, inet_s);
259 /* add_host_to_cache() can establish a slave-master mapping. */
260 DEBUGP (("Checking again for %s in host_slave_master_map.\n", host));
261 master_name = hash_table_get (host_slave_master_map, host);
266 return xstrdup_lower (host);
269 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
270 taking care of aliases. It uses realhost() to determine a unique
271 hostname for each of two hosts. If simple_check is non-zero, only
272 strcmp() is used for comparison. */
274 same_host (const char *u1, const char *u2)
280 /* Skip protocol, if present. */
281 u1 += url_skip_scheme (u1);
282 u2 += url_skip_scheme (u2);
284 /* Skip username ans password, if present. */
285 u1 += url_skip_uname (u1);
286 u2 += url_skip_uname (u2);
288 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
289 p1 = strdupdelim (s, u1);
290 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
291 p2 = strdupdelim (s, u2);
292 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
293 if (strcasecmp (p1, p2) == 0)
297 DEBUGP (("They are quite alike.\n"));
300 else if (opt.simple_check)
304 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
307 real1 = realhost (p1);
308 real2 = realhost (p2);
311 if (strcasecmp (real1, real2) == 0)
313 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
320 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
327 /* Determine whether a URL is acceptable to be followed, according to
328 a list of domains to accept. */
330 accept_domain (struct urlinfo *u)
332 assert (u->host != NULL);
335 if (!sufmatch ((const char **)opt.domains, u->host))
338 if (opt.exclude_domains)
340 if (sufmatch ((const char **)opt.exclude_domains, u->host))
346 /* Check whether WHAT is matched in LIST, each element of LIST being a
347 pattern to match WHAT against, using backward matching (see
348 match_backwards() in utils.c).
350 If an element of LIST matched, 1 is returned, 0 otherwise. */
352 sufmatch (const char **list, const char *what)
357 for (i = 0; list[i]; i++)
359 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
360 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
362 /* The domain must be first to reach to beginning. */
369 /* Print error messages for host errors. */
373 /* Can't use switch since some constants are equal (at least on my
374 system), and the compiler signals "duplicate case value". */
375 if (error == HOST_NOT_FOUND
376 || error == NO_RECOVERY
378 || error == NO_ADDRESS
379 || error == TRY_AGAIN)
380 return _("Host not found");
382 return _("Unknown error");
388 /* host_name_address_map and host_address_name_map share the
389 strings. Because of that, calling free_keys_and_values once
390 suffices for both. */
391 free_keys_and_values (host_name_address_map);
392 hash_table_destroy (host_name_address_map);
393 hash_table_destroy (host_address_name_map);
394 free_keys_and_values (host_slave_master_map);
395 hash_table_destroy (host_slave_master_map);
401 host_name_address_map = make_string_hash_table (0);
402 host_address_name_map = make_string_hash_table (0);
403 host_slave_master_map = make_string_hash_table (0);