1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
30 #include <sys/types.h>
35 # include <sys/socket.h>
36 # include <netinet/in.h>
37 # include <arpa/inet.h>
41 #ifdef HAVE_SYS_UTSNAME_H
42 # include <sys/utsname.h>
56 /* Mapping between all known hosts to their addresses (n.n.n.n). */
57 struct hash_table *host_name_address_map;
59 /* Mapping between all known addresses (n.n.n.n) to their hosts. This
60 is the inverse of host_name_address_map. These two tables share
61 the strdup'ed strings. */
62 struct hash_table *host_address_name_map;
64 /* Mapping between auxilliary (slave) and master host names. */
65 struct hash_table *host_slave_master_map;
67 /* Utility function: like xstrdup(), but also lowercases S. */
70 xstrdup_lower (const char *s)
72 char *copy = xstrdup (s);
79 /* The same as gethostbyname, but supports internet addresses of the
80 form `N.N.N.N'. On some systems gethostbyname() knows how to do
81 this automatically. */
83 ngethostbyname (const char *name)
88 addr = (unsigned long)inet_addr (name);
90 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
92 hp = gethostbyname (name);
96 /* Add host name HOST with the address ADDR_TEXT to the cache.
97 Normally this means that the (HOST, ADDR_TEXT) pair will be to
98 host_name_address_map and to host_address_name_map. (It is the
99 caller's responsibility to make sure that HOST is not already in
100 host_name_address_map.)
102 If the ADDR_TEXT has already been seen and belongs to another host,
103 HOST will be added to host_slave_master_map instead. */
106 add_host_to_cache (const char *host, const char *addr_text)
108 char *canonical_name = hash_table_get (host_address_name_map, addr_text);
111 DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
112 host, canonical_name));
113 /* We've already dealt with that host under another name. */
114 hash_table_put (host_slave_master_map,
115 xstrdup_lower (host),
116 xstrdup_lower (canonical_name));
120 /* This is really the first time we're dealing with that host. */
121 char *h_copy = xstrdup_lower (host);
122 char *a_copy = xstrdup (addr_text);
123 DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
124 hash_table_put (host_name_address_map, h_copy, a_copy);
125 hash_table_put (host_address_name_map, a_copy, h_copy);
129 /* Store the address of HOSTNAME, internet-style (four octets in
130 network order), to WHERE. First try to get the address from the
131 cache; if it is not available, call the DNS functions and update
134 Return 1 on successful finding of the hostname, 0 otherwise. */
136 store_hostaddress (unsigned char *where, const char *hostname)
140 char *canonical_name;
141 struct hostent *hptr;
145 /* If the address is of the form d.d.d.d, there will be no trouble
147 addr = (unsigned long)inet_addr (hostname);
148 /* If we have the numeric address, just store it. */
151 /* ADDR is defined to be in network byte order, meaning the code
152 works on little and big endian 32-bit architectures without
153 change. On big endian 64-bit architectures we need to be
154 careful to copy the correct four bytes. */
157 #ifdef WORDS_BIGENDIAN
158 offset = sizeof (unsigned long) - 4;
162 memcpy (where, (char *)&addr + offset, 4);
166 /* By now we know that the address is not of the form d.d.d.d. Try
167 to find it in our cache of host addresses. */
168 addr_text = hash_table_get (host_name_address_map, hostname);
171 DEBUGP (("Found %s in host_name_address_map: %s\n",
172 hostname, addr_text));
173 addr = (unsigned long)inet_addr (addr_text);
177 /* Maybe this host is known to us under another name. If so, we'll
178 find it in host_slave_master_map, and use the master name to find
179 its address in host_name_address_map. */
180 canonical_name = hash_table_get (host_slave_master_map, hostname);
183 addr_text = hash_table_get (host_name_address_map, canonical_name);
184 assert (addr_text != NULL);
185 DEBUGP (("Found %s as slave of %s -> %s\n",
186 hostname, canonical_name, addr_text));
187 addr = (unsigned long)inet_addr (addr_text);
191 /* Since all else has failed, let's try gethostbyname(). Note that
192 we use gethostbyname() rather than ngethostbyname(), because we
193 already know that the address is not numerical. */
194 hptr = gethostbyname (hostname);
197 /* Copy the address of the host to socket description. */
198 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
199 assert (hptr->h_length == 4);
201 /* Now that we've gone through the truoble of calling
202 gethostbyname(), we can store this valuable information to the
203 cache. First, we have to look for it by address to know if it's
204 already in the cache by another name. */
205 /* Originally, we copied to in.s_addr, but it appears to be missing
207 memcpy (&in, *hptr->h_addr_list, sizeof (in));
208 inet_s = inet_ntoa (in);
209 add_host_to_cache (hostname, inet_s);
213 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
214 is referenced by more than one name, "real" name is considered to
215 be the first one encountered in the past. */
217 realhost (const char *host)
220 struct hostent *hptr;
223 DEBUGP (("Checking for %s in host_name_address_map.\n", host));
224 if (hash_table_exists (host_name_address_map, host))
226 DEBUGP (("Found; %s was already used, by that name.\n", host));
227 return xstrdup_lower (host);
230 DEBUGP (("Checking for %s in host_slave_master_map.\n", host));
231 master_name = hash_table_get (host_slave_master_map, host);
235 DEBUGP (("Found; %s was already used, by the name %s.\n",
237 return xstrdup (master_name);
240 DEBUGP (("First time I hear about %s by that name; looking it up.\n",
242 hptr = ngethostbyname (host);
246 /* Originally, we copied to in.s_addr, but it appears to be
247 missing on some systems. */
248 memcpy (&in, *hptr->h_addr_list, sizeof (in));
249 inet_s = inet_ntoa (in);
251 add_host_to_cache (host, inet_s);
253 /* add_host_to_cache() can establish a slave-master mapping. */
254 DEBUGP (("Checking again for %s in host_slave_master_map.\n", host));
255 master_name = hash_table_get (host_slave_master_map, host);
260 return xstrdup_lower (host);
263 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
264 taking care of aliases. It uses realhost() to determine a unique
265 hostname for each of two hosts. If simple_check is non-zero, only
266 strcmp() is used for comparison. */
268 same_host (const char *u1, const char *u2)
274 /* Skip protocol, if present. */
277 u1 += skip_proto (u1);
278 u2 += skip_proto (u2);
280 /* Skip username ans password, if present. */
281 u1 += skip_uname (u1);
282 u2 += skip_uname (u2);
284 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
285 p1 = strdupdelim (s, u1);
286 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
287 p2 = strdupdelim (s, u2);
288 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
289 if (strcasecmp (p1, p2) == 0)
293 DEBUGP (("They are quite alike.\n"));
296 else if (opt.simple_check)
300 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
303 real1 = realhost (p1);
304 real2 = realhost (p2);
307 if (strcasecmp (real1, real2) == 0)
309 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
316 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
323 /* Determine whether a URL is acceptable to be followed, according to
324 a list of domains to accept. */
326 accept_domain (struct urlinfo *u)
328 assert (u->host != NULL);
331 if (!sufmatch ((const char **)opt.domains, u->host))
334 if (opt.exclude_domains)
336 if (sufmatch ((const char **)opt.exclude_domains, u->host))
342 /* Check whether WHAT is matched in LIST, each element of LIST being a
343 pattern to match WHAT against, using backward matching (see
344 match_backwards() in utils.c).
346 If an element of LIST matched, 1 is returned, 0 otherwise. */
348 sufmatch (const char **list, const char *what)
353 for (i = 0; list[i]; i++)
355 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
356 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
358 /* The domain must be first to reach to beginning. */
365 /* Return email address of the form username@FQDN suitable for
366 anonymous FTP passwords. This process is error-prone, and the
367 escape hatch is the MY_HOST preprocessor constant, which can be
368 used to hard-code either your hostname or FQDN at compile-time.
370 If the FQDN cannot be determined, a warning is printed, and the
371 function returns a short `username@' form, accepted by most
374 The returned string is generated by malloc() and should be freed
377 If not even the username cannot be divined, it means things are
378 seriously fucked up, and Wget exits. */
380 ftp_getaddress (void)
382 static char *address;
384 /* Do the drill only the first time, as it won't change. */
387 char userid[32]; /* 9 should be enough for Unix, but
388 I'd rather be on the safe side. */
391 if (!pwd_cuserid (userid))
393 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
398 STRDUP_ALLOCA (host, MY_HOST);
399 #else /* not MY_HOST */
403 if (uname (&ubuf) < 0)
405 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
406 exec_name, strerror (errno));
410 STRDUP_ALLOCA (host, ubuf.nodename);
412 #else /* not HAVE_UNAME */
413 #ifdef HAVE_GETHOSTNAME
415 if (gethostname (host, 256) < 0)
417 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
422 #else /* not HAVE_GETHOSTNAME */
423 #error Cannot determine host name.
424 #endif /* not HAVE_GETHOSTNAME */
425 #endif /* not HAVE_UNAME */
426 #endif /* not MY_HOST */
427 /* If the address we got so far contains a period, don't bother
429 if (strchr (host, '.'))
433 /* #### I've seen the following scheme fail on at least one
434 system! Do we care? */
436 /* According to Richard Stevens, the correct way to find the
437 FQDN is to (1) find the host name, (2) find its IP
438 address using gethostbyname(), and (3) get the FQDN using
439 gethostbyaddr(). So that's what we'll do. Step one has
442 struct hostent *hp = gethostbyname (host);
443 if (!hp || !hp->h_addr_list)
445 logprintf (LOG_ALWAYS, _("\
446 %s: Warning: cannot determine local IP address.\n"),
451 /* Copy the argument, so the call to gethostbyaddr doesn't
452 clobber it -- just in case. */
453 tmpstore = (char *)alloca (hp->h_length);
454 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
456 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
457 if (!hp || !hp->h_name)
459 logprintf (LOG_ALWAYS, _("\
460 %s: Warning: cannot reverse-lookup local IP address.\n"),
465 if (!strchr (hp->h_name, '.'))
468 /* This gets ticked pretty often. Karl Berry reports
469 that there can be valid reasons for the local host
470 name not to be an FQDN, so I've decided to remove the
472 logprintf (LOG_ALWAYS, _("\
473 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
479 /* Once we're here, hp->h_name contains the correct FQDN. */
480 STRDUP_ALLOCA (fqdn, hp->h_name);
483 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
484 sprintf (address, "%s@%s", userid, fqdn);
489 /* Print error messages for host errors. */
493 /* Can't use switch since some constants are equal (at least on my
494 system), and the compiler signals "duplicate case value". */
495 if (error == HOST_NOT_FOUND
496 || error == NO_RECOVERY
498 || error == NO_ADDRESS
499 || error == TRY_AGAIN)
500 return _("Host not found");
502 return _("Unknown error");
508 /* host_name_address_map and host_address_name_map share the
509 strings. Because of that, calling free_keys_and_values once
510 suffices for both. */
511 free_keys_and_values (host_name_address_map);
512 hash_table_destroy (host_name_address_map);
513 hash_table_destroy (host_address_name_map);
514 free_keys_and_values (host_slave_master_map);
515 hash_table_destroy (host_slave_master_map);
521 host_name_address_map = make_string_hash_table (0);
522 host_address_name_map = make_string_hash_table (0);
523 host_slave_master_map = make_string_hash_table (0);