1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
38 # include <arpa/inet.h>
42 #ifdef HAVE_SYS_UTSNAME_H
43 # include <sys/utsname.h>
57 /* Mapping between all known hosts to their addresses (n.n.n.n). */
58 struct hash_table *host_name_address_map;
60 /* Mapping between all known addresses (n.n.n.n) to their hosts. This
61 is the inverse of host_name_address_map. These two tables share
62 the strdup'ed strings. */
63 struct hash_table *host_address_name_map;
65 /* Mapping between auxilliary (slave) and master host names. */
66 struct hash_table *host_slave_master_map;
68 /* Utility function: like xstrdup(), but also lowercases S. */
71 xstrdup_lower (const char *s)
73 char *copy = xstrdup (s);
80 /* The same as gethostbyname, but supports internet addresses of the
81 form `N.N.N.N'. On some systems gethostbyname() knows how to do
82 this automatically. */
84 ngethostbyname (const char *name)
89 addr = (unsigned long)inet_addr (name);
91 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
93 hp = gethostbyname (name);
97 /* Add host name HOST with the address ADDR_TEXT to the cache.
98 Normally this means that the (HOST, ADDR_TEXT) pair will be to
99 host_name_address_map and to host_address_name_map. (It is the
100 caller's responsibility to make sure that HOST is not already in
101 host_name_address_map.)
103 If the ADDR_TEXT has already been seen and belongs to another host,
104 HOST will be added to host_slave_master_map instead. */
107 add_host_to_cache (const char *host, const char *addr_text)
109 char *canonical_name = hash_table_get (host_address_name_map, addr_text);
112 DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
113 host, canonical_name));
114 /* We've already dealt with that host under another name. */
115 hash_table_put (host_slave_master_map,
116 xstrdup_lower (host),
117 xstrdup_lower (canonical_name));
121 /* This is really the first time we're dealing with that host. */
122 char *h_copy = xstrdup_lower (host);
123 char *a_copy = xstrdup (addr_text);
124 DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
125 hash_table_put (host_name_address_map, h_copy, a_copy);
126 hash_table_put (host_address_name_map, a_copy, h_copy);
130 /* Store the address of HOSTNAME, internet-style (four octets in
131 network order), to WHERE. First try to get the address from the
132 cache; if it is not available, call the DNS functions and update
135 Return 1 on successful finding of the hostname, 0 otherwise. */
137 store_hostaddress (unsigned char *where, const char *hostname)
141 char *canonical_name;
142 struct hostent *hptr;
146 /* If the address is of the form d.d.d.d, there will be no trouble
148 addr = (unsigned long)inet_addr (hostname);
149 /* If we have the numeric address, just store it. */
152 /* ADDR is defined to be in network byte order, meaning the code
153 works on little and big endian 32-bit architectures without
154 change. On big endian 64-bit architectures we need to be
155 careful to copy the correct four bytes. */
158 #ifdef WORDS_BIGENDIAN
159 offset = sizeof (unsigned long) - 4;
163 memcpy (where, (char *)&addr + offset, 4);
167 /* By now we know that the address is not of the form d.d.d.d. Try
168 to find it in our cache of host addresses. */
169 addr_text = hash_table_get (host_name_address_map, hostname);
172 DEBUGP (("Found %s in host_name_address_map: %s\n",
173 hostname, addr_text));
174 addr = (unsigned long)inet_addr (addr_text);
178 /* Maybe this host is known to us under another name. If so, we'll
179 find it in host_slave_master_map, and use the master name to find
180 its address in host_name_address_map. */
181 canonical_name = hash_table_get (host_slave_master_map, hostname);
184 addr_text = hash_table_get (host_name_address_map, canonical_name);
185 assert (addr_text != NULL);
186 DEBUGP (("Found %s as slave of %s -> %s\n",
187 hostname, canonical_name, addr_text));
188 addr = (unsigned long)inet_addr (addr_text);
192 /* Since all else has failed, let's try gethostbyname(). Note that
193 we use gethostbyname() rather than ngethostbyname(), because we
194 already know that the address is not numerical. */
195 hptr = gethostbyname (hostname);
198 /* Copy the address of the host to socket description. */
199 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
200 assert (hptr->h_length == 4);
202 /* Now that we've gone through the truoble of calling
203 gethostbyname(), we can store this valuable information to the
204 cache. First, we have to look for it by address to know if it's
205 already in the cache by another name. */
206 /* Originally, we copied to in.s_addr, but it appears to be missing
208 memcpy (&in, *hptr->h_addr_list, sizeof (in));
209 inet_s = inet_ntoa (in);
210 add_host_to_cache (hostname, inet_s);
214 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
215 is referenced by more than one name, "real" name is considered to
216 be the first one encountered in the past. */
218 realhost (const char *host)
221 struct hostent *hptr;
224 DEBUGP (("Checking for %s in host_name_address_map.\n", host));
225 if (hash_table_exists (host_name_address_map, host))
227 DEBUGP (("Found; %s was already used, by that name.\n", host));
228 return xstrdup_lower (host);
231 DEBUGP (("Checking for %s in host_slave_master_map.\n", host));
232 master_name = hash_table_get (host_slave_master_map, host);
236 DEBUGP (("Found; %s was already used, by the name %s.\n",
238 return xstrdup (master_name);
241 DEBUGP (("First time I hear about %s by that name; looking it up.\n",
243 hptr = ngethostbyname (host);
247 /* Originally, we copied to in.s_addr, but it appears to be
248 missing on some systems. */
249 memcpy (&in, *hptr->h_addr_list, sizeof (in));
250 inet_s = inet_ntoa (in);
252 add_host_to_cache (host, inet_s);
254 /* add_host_to_cache() can establish a slave-master mapping. */
255 DEBUGP (("Checking again for %s in host_slave_master_map.\n", host));
256 master_name = hash_table_get (host_slave_master_map, host);
261 return xstrdup_lower (host);
264 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
265 taking care of aliases. It uses realhost() to determine a unique
266 hostname for each of two hosts. If simple_check is non-zero, only
267 strcmp() is used for comparison. */
269 same_host (const char *u1, const char *u2)
275 /* Skip protocol, if present. */
278 u1 += skip_proto (u1);
279 u2 += skip_proto (u2);
281 /* Skip username ans password, if present. */
282 u1 += skip_uname (u1);
283 u2 += skip_uname (u2);
285 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
286 p1 = strdupdelim (s, u1);
287 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
288 p2 = strdupdelim (s, u2);
289 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
290 if (strcasecmp (p1, p2) == 0)
294 DEBUGP (("They are quite alike.\n"));
297 else if (opt.simple_check)
301 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
304 real1 = realhost (p1);
305 real2 = realhost (p2);
308 if (strcasecmp (real1, real2) == 0)
310 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
317 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
324 /* Determine whether a URL is acceptable to be followed, according to
325 a list of domains to accept. */
327 accept_domain (struct urlinfo *u)
329 assert (u->host != NULL);
332 if (!sufmatch ((const char **)opt.domains, u->host))
335 if (opt.exclude_domains)
337 if (sufmatch ((const char **)opt.exclude_domains, u->host))
343 /* Check whether WHAT is matched in LIST, each element of LIST being a
344 pattern to match WHAT against, using backward matching (see
345 match_backwards() in utils.c).
347 If an element of LIST matched, 1 is returned, 0 otherwise. */
349 sufmatch (const char **list, const char *what)
354 for (i = 0; list[i]; i++)
356 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
357 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
359 /* The domain must be first to reach to beginning. */
366 /* Return email address of the form username@FQDN suitable for
367 anonymous FTP passwords. This process is error-prone, and the
368 escape hatch is the MY_HOST preprocessor constant, which can be
369 used to hard-code either your hostname or FQDN at compile-time.
371 If the FQDN cannot be determined, a warning is printed, and the
372 function returns a short `username@' form, accepted by most
375 If not even the username cannot be divined, it means things are
376 seriously fucked up, and Wget exits. */
378 ftp_getaddress (void)
380 static char *address;
382 /* Do the drill only the first time, as it won't change. */
385 char userid[32]; /* 9 should be enough for Unix, but
386 I'd rather be on the safe side. */
389 if (!pwd_cuserid (userid))
391 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
396 STRDUP_ALLOCA (host, MY_HOST);
397 #else /* not MY_HOST */
401 if (uname (&ubuf) < 0)
403 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
404 exec_name, strerror (errno));
408 STRDUP_ALLOCA (host, ubuf.nodename);
410 #else /* not HAVE_UNAME */
411 #ifdef HAVE_GETHOSTNAME
413 if (gethostname (host, 256) < 0)
415 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
420 #else /* not HAVE_GETHOSTNAME */
421 #error Cannot determine host name.
422 #endif /* not HAVE_GETHOSTNAME */
423 #endif /* not HAVE_UNAME */
424 #endif /* not MY_HOST */
425 /* If the address we got so far contains a period, don't bother
427 if (strchr (host, '.'))
431 /* #### I've seen the following scheme fail on at least one
432 system! Do we care? */
434 /* According to Richard Stevens, the correct way to find the
435 FQDN is to (1) find the host name, (2) find its IP
436 address using gethostbyname(), and (3) get the FQDN using
437 gethostbyaddr(). So that's what we'll do. Step one has
440 struct hostent *hp = gethostbyname (host);
441 if (!hp || !hp->h_addr_list)
443 logprintf (LOG_ALWAYS, _("\
444 %s: Warning: cannot determine local IP address.\n"),
449 /* Copy the argument, so the call to gethostbyaddr doesn't
450 clobber it -- just in case. */
451 tmpstore = (char *)alloca (hp->h_length);
452 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
454 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
455 if (!hp || !hp->h_name)
457 logprintf (LOG_ALWAYS, _("\
458 %s: Warning: cannot reverse-lookup local IP address.\n"),
463 if (!strchr (hp->h_name, '.'))
466 /* This gets ticked pretty often. Karl Berry reports
467 that there can be valid reasons for the local host
468 name not to be an FQDN, so I've decided to remove the
470 logprintf (LOG_ALWAYS, _("\
471 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
477 /* Once we're here, hp->h_name contains the correct FQDN. */
478 STRDUP_ALLOCA (fqdn, hp->h_name);
481 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
482 sprintf (address, "%s@%s", userid, fqdn);
487 /* Print error messages for host errors. */
491 /* Can't use switch since some constants are equal (at least on my
492 system), and the compiler signals "duplicate case value". */
493 if (error == HOST_NOT_FOUND
494 || error == NO_RECOVERY
496 || error == NO_ADDRESS
497 || error == TRY_AGAIN)
498 return _("Host not found");
500 return _("Unknown error");
506 /* host_name_address_map and host_address_name_map share the
507 strings. Because of that, calling free_keys_and_values once
508 suffices for both. */
509 free_keys_and_values (host_name_address_map);
510 hash_table_destroy (host_name_address_map);
511 hash_table_destroy (host_address_name_map);
512 free_keys_and_values (host_slave_master_map);
513 hash_table_destroy (host_slave_master_map);
519 host_name_address_map = make_string_hash_table (0);
520 host_address_name_map = make_string_hash_table (0);
521 host_slave_master_map = make_string_hash_table (0);