1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
37 # include <arpa/inet.h>
41 #ifdef HAVE_SYS_UTSNAME_H
42 # include <sys/utsname.h>
56 /* Mapping between all known hosts to their addresses (n.n.n.n). */
57 struct hash_table *host_name_address_map;
59 /* Mapping between all known addresses (n.n.n.n) to their hosts. This
60 is the inverse of host_name_address_map. These two tables share
61 the strdup'ed strings. */
62 struct hash_table *host_address_name_map;
64 /* Mapping between auxilliary (slave) and master host names. */
65 struct hash_table *host_slave_master_map;
67 /* Utility function: like xstrdup(), but also lowercases S. */
70 xstrdup_lower (const char *s)
72 char *copy = xstrdup (s);
79 /* The same as gethostbyname, but supports internet addresses of the
80 form `N.N.N.N'. On some systems gethostbyname() knows how to do
81 this automatically. */
83 ngethostbyname (const char *name)
88 addr = (unsigned long)inet_addr (name);
90 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
92 hp = gethostbyname (name);
96 /* Add host name HOST with the address ADDR_TEXT to the cache.
97 Normally this means that the (HOST, ADDR_TEXT) pair will be to
98 host_name_address_map and to host_address_name_map. (It is the
99 caller's responsibility to make sure that HOST is not already in
100 host_name_address_map.)
102 If the ADDR_TEXT has already been seen and belongs to another host,
103 HOST will be added to host_slave_master_map instead. */
106 add_host_to_cache (const char *host, const char *addr_text)
108 char *canonical_name = hash_table_get (host_address_name_map, addr_text);
111 DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
112 host, canonical_name));
113 /* We've already dealt with that host under another name. */
114 hash_table_put (host_slave_master_map,
115 xstrdup_lower (host),
116 xstrdup_lower (canonical_name));
120 /* This is really the first time we're dealing with that host. */
121 char *h_copy = xstrdup_lower (host);
122 char *a_copy = xstrdup (addr_text);
123 DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
124 hash_table_put (host_name_address_map, h_copy, a_copy);
125 hash_table_put (host_address_name_map, a_copy, h_copy);
129 /* Store the address of HOSTNAME, internet-style (four octets in
130 network order), to WHERE. First try to get the address from the
131 cache; if it is not available, call the DNS functions and update
134 Return 1 on successful finding of the hostname, 0 otherwise. */
136 store_hostaddress (unsigned char *where, const char *hostname)
140 char *canonical_name;
141 struct hostent *hptr;
145 /* If the address is of the form d.d.d.d, there will be no trouble
147 addr = (unsigned long)inet_addr (hostname);
148 /* If we have the numeric address, just store it. */
151 /* ADDR is defined to be in network byte order, meaning the code
152 works on little and big endian 32-bit architectures without
153 change. On big endian 64-bit architectures we need to be
154 careful to copy the correct four bytes. */
157 #ifdef WORDS_BIGENDIAN
158 offset = sizeof (unsigned long) - 4;
162 memcpy (where, (char *)&addr + offset, 4);
166 /* By now we know that the address is not of the form d.d.d.d. Try
167 to find it in our cache of host addresses. */
168 addr_text = hash_table_get (host_name_address_map, hostname);
171 DEBUGP (("Found %s in host_name_address_map: %s\n",
172 hostname, addr_text));
173 addr = (unsigned long)inet_addr (addr_text);
177 /* Maybe this host is known to us under another name. If so, we'll
178 find it in host_slave_master_map, and use the master name to find
179 its address in host_name_address_map. */
180 canonical_name = hash_table_get (host_slave_master_map, hostname);
183 addr_text = hash_table_get (host_name_address_map, canonical_name);
184 assert (addr_text != NULL);
185 DEBUGP (("Found %s as slave of %s -> %s\n",
186 hostname, canonical_name, addr_text));
187 addr = (unsigned long)inet_addr (addr_text);
191 /* Since all else has failed, let's try gethostbyname(). Note that
192 we use gethostbyname() rather than ngethostbyname(), because we
193 already know that the address is not numerical. */
194 hptr = gethostbyname (hostname);
197 /* Copy the address of the host to socket description. */
198 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
199 assert (hptr->h_length == 4);
201 /* Now that we've gone through the truoble of calling
202 gethostbyname(), we can store this valuable information to the
203 cache. First, we have to look for it by address to know if it's
204 already in the cache by another name. */
205 /* Originally, we copied to in.s_addr, but it appears to be missing
207 memcpy (&in, *hptr->h_addr_list, sizeof (in));
208 inet_s = inet_ntoa (in);
209 add_host_to_cache (hostname, inet_s);
213 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
214 is referenced by more than one name, "real" name is considered to
215 be the first one encountered in the past. */
217 realhost (const char *host)
220 struct hostent *hptr;
223 DEBUGP (("Checking for %s in host_name_address_map.\n", host));
224 if (hash_table_exists (host_name_address_map, host))
226 DEBUGP (("Found; %s was already used, by that name.\n", host));
227 return xstrdup_lower (host);
230 DEBUGP (("Checking for %s in host_slave_master_map.\n", host));
231 master_name = hash_table_get (host_slave_master_map, host);
235 DEBUGP (("Found; %s was already used, by the name %s.\n",
237 return xstrdup (master_name);
240 DEBUGP (("First time I hear about %s by that name; looking it up.\n",
242 hptr = ngethostbyname (host);
246 /* Originally, we copied to in.s_addr, but it appears to be
247 missing on some systems. */
248 memcpy (&in, *hptr->h_addr_list, sizeof (in));
249 inet_s = inet_ntoa (in);
251 add_host_to_cache (host, inet_s);
253 /* add_host_to_cache() can establish a slave-master mapping. */
254 DEBUGP (("Checking again for %s in host_slave_master_map.\n", host));
255 master_name = hash_table_get (host_slave_master_map, host);
260 return xstrdup_lower (host);
263 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
264 taking care of aliases. It uses realhost() to determine a unique
265 hostname for each of two hosts. If simple_check is non-zero, only
266 strcmp() is used for comparison. */
268 same_host (const char *u1, const char *u2)
274 /* Skip protocol, if present. */
275 u1 += skip_proto (u1);
276 u2 += skip_proto (u2);
278 /* Skip username ans password, if present. */
279 u1 += skip_uname (u1);
280 u2 += skip_uname (u2);
282 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
283 p1 = strdupdelim (s, u1);
284 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
285 p2 = strdupdelim (s, u2);
286 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
287 if (strcasecmp (p1, p2) == 0)
291 DEBUGP (("They are quite alike.\n"));
294 else if (opt.simple_check)
298 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
301 real1 = realhost (p1);
302 real2 = realhost (p2);
305 if (strcasecmp (real1, real2) == 0)
307 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
314 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
321 /* Determine whether a URL is acceptable to be followed, according to
322 a list of domains to accept. */
324 accept_domain (struct urlinfo *u)
326 assert (u->host != NULL);
329 if (!sufmatch ((const char **)opt.domains, u->host))
332 if (opt.exclude_domains)
334 if (sufmatch ((const char **)opt.exclude_domains, u->host))
340 /* Check whether WHAT is matched in LIST, each element of LIST being a
341 pattern to match WHAT against, using backward matching (see
342 match_backwards() in utils.c).
344 If an element of LIST matched, 1 is returned, 0 otherwise. */
346 sufmatch (const char **list, const char *what)
351 for (i = 0; list[i]; i++)
353 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
354 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
356 /* The domain must be first to reach to beginning. */
363 /* Return email address of the form username@FQDN suitable for
364 anonymous FTP passwords. This process is error-prone, and the
365 escape hatch is the MY_HOST preprocessor constant, which can be
366 used to hard-code either your hostname or FQDN at compile-time.
368 If the FQDN cannot be determined, a warning is printed, and the
369 function returns a short `username@' form, accepted by most
372 The returned string is generated by malloc() and should be freed
375 If not even the username cannot be divined, it means things are
376 seriously fucked up, and Wget exits. */
378 ftp_getaddress (void)
380 static char *address;
382 /* Do the drill only the first time, as it won't change. */
385 char userid[32]; /* 9 should be enough for Unix, but
386 I'd rather be on the safe side. */
389 if (!pwd_cuserid (userid))
391 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
396 STRDUP_ALLOCA (host, MY_HOST);
397 #else /* not MY_HOST */
401 if (uname (&ubuf) < 0)
403 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
404 exec_name, strerror (errno));
408 STRDUP_ALLOCA (host, ubuf.nodename);
410 #else /* not HAVE_UNAME */
411 #ifdef HAVE_GETHOSTNAME
413 if (gethostname (host, 256) < 0)
415 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
420 #else /* not HAVE_GETHOSTNAME */
421 #error Cannot determine host name.
422 #endif /* not HAVE_GETHOSTNAME */
423 #endif /* not HAVE_UNAME */
424 #endif /* not MY_HOST */
425 /* If the address we got so far contains a period, don't bother
427 if (strchr (host, '.'))
431 /* #### I've seen the following scheme fail on at least one
432 system! Do we care? */
434 /* According to Richard Stevens, the correct way to find the
435 FQDN is to (1) find the host name, (2) find its IP
436 address using gethostbyname(), and (3) get the FQDN using
437 gethostbyaddr(). So that's what we'll do. Step one has
440 struct hostent *hp = gethostbyname (host);
441 if (!hp || !hp->h_addr_list)
443 logprintf (LOG_ALWAYS, _("\
444 %s: Warning: cannot determine local IP address.\n"),
449 /* Copy the argument, so the call to gethostbyaddr doesn't
450 clobber it -- just in case. */
451 tmpstore = (char *)alloca (hp->h_length);
452 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
454 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
455 if (!hp || !hp->h_name)
457 logprintf (LOG_ALWAYS, _("\
458 %s: Warning: cannot reverse-lookup local IP address.\n"),
463 if (!strchr (hp->h_name, '.'))
466 /* This gets ticked pretty often. Karl Berry reports
467 that there can be valid reasons for the local host
468 name not to be an FQDN, so I've decided to remove the
470 logprintf (LOG_ALWAYS, _("\
471 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
477 /* Once we're here, hp->h_name contains the correct FQDN. */
478 STRDUP_ALLOCA (fqdn, hp->h_name);
481 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
482 sprintf (address, "%s@%s", userid, fqdn);
487 /* Print error messages for host errors. */
491 /* Can't use switch since some constants are equal (at least on my
492 system), and the compiler signals "duplicate case value". */
493 if (error == HOST_NOT_FOUND
494 || error == NO_RECOVERY
496 || error == NO_ADDRESS
497 || error == TRY_AGAIN)
498 return _("Host not found");
500 return _("Unknown error");
506 /* host_name_address_map and host_address_name_map share the
507 strings. Because of that, calling free_keys_and_values once
508 suffices for both. */
509 free_keys_and_values (host_name_address_map);
510 hash_table_destroy (host_name_address_map);
511 hash_table_destroy (host_address_name_map);
512 free_keys_and_values (host_slave_master_map);
513 hash_table_destroy (host_slave_master_map);
519 host_name_address_map = make_string_hash_table (0);
520 host_address_name_map = make_string_hash_table (0);
521 host_slave_master_map = make_string_hash_table (0);