1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
37 # include <arpa/inet.h>
41 #ifdef HAVE_SYS_UTSNAME_H
42 # include <sys/utsname.h>
56 /* Mapping between all known hosts to their addresses (n.n.n.n). */
57 struct hash_table *host_name_address_map;
59 /* Mapping between all known addresses (n.n.n.n) to their hosts. This
60 is the inverse of host_name_address_map. These two tables share
61 the strdup'ed strings. */
62 struct hash_table *host_address_name_map;
64 /* Mapping between auxilliary (slave) and master host names. */
65 struct hash_table *host_slave_master_map;
67 /* Utility function: like xstrdup(), but also lowercases S. */
70 xstrdup_lower (const char *s)
72 char *copy = xstrdup (s);
79 /* The same as gethostbyname, but supports internet addresses of the
80 form `N.N.N.N'. On some systems gethostbyname() knows how to do
81 this automatically. */
83 ngethostbyname (const char *name)
88 addr = (unsigned long)inet_addr (name);
90 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
92 hp = gethostbyname (name);
96 /* Add host name HOST with the address ADDR_TEXT to the cache.
97 Normally this means that the (HOST, ADDR_TEXT) pair will be to
98 host_name_address_map and to host_address_name_map. (It is the
99 caller's responsibility to make sure that HOST is not already in
100 host_name_address_map.)
102 If the ADDR_TEXT has already been seen and belongs to another host,
103 HOST will be added to host_slave_master_map instead. */
106 add_host_to_cache (const char *host, const char *addr_text)
108 char *canonical_name = hash_table_get (host_address_name_map, addr_text);
111 DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
112 host, canonical_name));
113 /* We've already dealt with that host under another name. */
114 hash_table_put (host_slave_master_map,
115 xstrdup_lower (host),
116 xstrdup_lower (canonical_name));
120 /* This is really the first time we're dealing with that host. */
121 char *h_copy = xstrdup_lower (host);
122 char *a_copy = xstrdup (addr_text);
123 DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
124 hash_table_put (host_name_address_map, h_copy, a_copy);
125 hash_table_put (host_address_name_map, a_copy, h_copy);
129 /* Store the address of HOSTNAME, internet-style (four octets in
130 network order), to WHERE. First try to get the address from the
131 cache; if it is not available, call the DNS functions and update
134 Return 1 on successful finding of the hostname, 0 otherwise. */
136 store_hostaddress (unsigned char *where, const char *hostname)
140 char *canonical_name;
141 struct hostent *hptr;
145 /* If the address is of the form d.d.d.d, there will be no trouble
147 addr = (unsigned long)inet_addr (hostname);
148 /* If we have the numeric address, just store it. */
151 /* ADDR is defined to be in network byte order, meaning the code
152 works on little and big endian 32-bit architectures without
153 change. On big endian 64-bit architectures we need to be
154 careful to copy the correct four bytes. */
157 #ifdef WORDS_BIGENDIAN
158 offset = sizeof (unsigned long) - 4;
162 memcpy (where, (char *)&addr + offset, 4);
166 /* By now we know that the address is not of the form d.d.d.d. Try
167 to find it in our cache of host addresses. */
168 addr_text = hash_table_get (host_name_address_map, hostname);
171 DEBUGP (("Found %s in host_name_address_map: %s\n",
172 hostname, addr_text));
173 addr = (unsigned long)inet_addr (addr_text);
177 /* Maybe this host is known to us under another name. If so, we'll
178 find it in host_slave_master_map, and use the master name to find
179 its address in host_name_address_map. */
180 canonical_name = hash_table_get (host_slave_master_map, hostname);
183 addr_text = hash_table_get (host_name_address_map, canonical_name);
184 assert (addr_text != NULL);
185 DEBUGP (("Found %s as slave of %s -> %s\n",
186 hostname, canonical_name, addr_text));
187 addr = (unsigned long)inet_addr (addr_text);
191 /* Since all else has failed, let's try gethostbyname(). Note that
192 we use gethostbyname() rather than ngethostbyname(), because we
193 already know that the address is not numerical. */
194 hptr = gethostbyname (hostname);
197 /* Copy the address of the host to socket description. */
198 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
199 assert (hptr->h_length == 4);
201 /* Now that we've gone through the truoble of calling
202 gethostbyname(), we can store this valuable information to the
203 cache. First, we have to look for it by address to know if it's
204 already in the cache by another name. */
205 /* Originally, we copied to in.s_addr, but it appears to be missing
207 memcpy (&in, *hptr->h_addr_list, sizeof (in));
208 inet_s = inet_ntoa (in);
209 add_host_to_cache (hostname, inet_s);
213 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
214 is referenced by more than one name, "real" name is considered to
215 be the first one encountered in the past. */
217 realhost (const char *host)
220 struct hostent *hptr;
223 DEBUGP (("Checking for %s in host_name_address_map.\n", host));
224 if (hash_table_contains (host_name_address_map, host))
226 DEBUGP (("Found; %s was already used, by that name.\n", host));
227 return xstrdup_lower (host);
230 DEBUGP (("Checking for %s in host_slave_master_map.\n", host));
231 master_name = hash_table_get (host_slave_master_map, host);
235 DEBUGP (("Found; %s was already used, by the name %s.\n",
237 return xstrdup (master_name);
240 DEBUGP (("First time I hear about %s by that name; looking it up.\n",
242 hptr = ngethostbyname (host);
246 /* Originally, we copied to in.s_addr, but it appears to be
247 missing on some systems. */
248 memcpy (&in, *hptr->h_addr_list, sizeof (in));
249 inet_s = inet_ntoa (in);
251 add_host_to_cache (host, inet_s);
253 /* add_host_to_cache() can establish a slave-master mapping. */
254 DEBUGP (("Checking again for %s in host_slave_master_map.\n", host));
255 master_name = hash_table_get (host_slave_master_map, host);
260 return xstrdup_lower (host);
263 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
264 taking care of aliases. It uses realhost() to determine a unique
265 hostname for each of two hosts. If simple_check is non-zero, only
266 strcmp() is used for comparison. */
268 same_host (const char *u1, const char *u2)
274 /* Skip protocol, if present. */
275 u1 += skip_proto (u1);
276 u2 += skip_proto (u2);
278 /* Skip username ans password, if present. */
279 u1 += skip_uname (u1);
280 u2 += skip_uname (u2);
282 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
283 p1 = strdupdelim (s, u1);
284 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
285 p2 = strdupdelim (s, u2);
286 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
287 if (strcasecmp (p1, p2) == 0)
291 DEBUGP (("They are quite alike.\n"));
294 else if (opt.simple_check)
298 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
301 real1 = realhost (p1);
302 real2 = realhost (p2);
305 if (strcasecmp (real1, real2) == 0)
307 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
314 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
321 /* Determine whether a URL is acceptable to be followed, according to
322 a list of domains to accept. */
324 accept_domain (struct urlinfo *u)
326 assert (u->host != NULL);
329 if (!sufmatch ((const char **)opt.domains, u->host))
332 if (opt.exclude_domains)
334 if (sufmatch ((const char **)opt.exclude_domains, u->host))
340 /* Check whether WHAT is matched in LIST, each element of LIST being a
341 pattern to match WHAT against, using backward matching (see
342 match_backwards() in utils.c).
344 If an element of LIST matched, 1 is returned, 0 otherwise. */
346 sufmatch (const char **list, const char *what)
351 for (i = 0; list[i]; i++)
353 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
354 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
356 /* The domain must be first to reach to beginning. */
363 /* Print error messages for host errors. */
367 /* Can't use switch since some constants are equal (at least on my
368 system), and the compiler signals "duplicate case value". */
369 if (error == HOST_NOT_FOUND
370 || error == NO_RECOVERY
372 || error == NO_ADDRESS
373 || error == TRY_AGAIN)
374 return _("Host not found");
376 return _("Unknown error");
382 /* host_name_address_map and host_address_name_map share the
383 strings. Because of that, calling free_keys_and_values once
384 suffices for both. */
385 free_keys_and_values (host_name_address_map);
386 hash_table_destroy (host_name_address_map);
387 hash_table_destroy (host_address_name_map);
388 free_keys_and_values (host_slave_master_map);
389 hash_table_destroy (host_slave_master_map);
395 host_name_address_map = make_string_hash_table (0);
396 host_address_name_map = make_string_hash_table (0);
397 host_slave_master_map = make_string_hash_table (0);