/* Dealing with host names.
- Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
-This program is distributed in the hope that it will be useful,
+GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
+along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
-#include <ctype.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
#else
# include <sys/socket.h>
# include <netinet/in.h>
+#ifndef __BEOS__
# include <arpa/inet.h>
+#endif
# include <netdb.h>
#endif /* WINDOWS */
+#ifndef NO_ADDRESS
+#define NO_ADDRESS NO_DATA
+#endif
+
#ifdef HAVE_SYS_UTSNAME_H
# include <sys/utsname.h>
#endif
#include "utils.h"
#include "host.h"
#include "url.h"
+#include "hash.h"
#ifndef errno
extern int errno;
#endif
-/* Host list entry */
-struct host
+/* Mapping between all known hosts to their addresses (n.n.n.n). */
+struct hash_table *host_name_address_map;
+
+/* Mapping between all known addresses (n.n.n.n) to their hosts. This
+ is the inverse of host_name_address_map. These two tables share
+ the strdup'ed strings. */
+struct hash_table *host_address_name_map;
+
+/* Mapping between auxilliary (slave) and master host names. */
+struct hash_table *host_slave_master_map;
+
+/* Utility function: like xstrdup(), but also lowercases S. */
+
+static char *
+xstrdup_lower (const char *s)
{
- /* Host's symbolical name, as encountered at the time of first
- inclusion, e.g. "fly.cc.fer.hr". */
- char *hostname;
- /* Host's "real" name, i.e. its IP address, written out in ASCII
- form of N.N.N.N, e.g. "161.53.70.130". */
- char *realname;
- /* More than one HOSTNAME can correspond to the same REALNAME. For
- our purposes, the canonical name of the host is its HOSTNAME when
- it was first encountered. This entry is said to have QUALITY. */
- int quality;
- /* Next entry in the list. */
- struct host *next;
-};
-
-static struct host *hlist;
-
-static struct host *add_hlist PARAMS ((struct host *, const char *,
- const char *, int));
+ char *copy = xstrdup (s);
+ char *p = copy;
+ for (; *p; p++)
+ *p = TOLOWER (*p);
+ return copy;
+}
/* The same as gethostbyname, but supports internet addresses of the
- form `N.N.N.N'. */
+ form `N.N.N.N'. On some systems gethostbyname() knows how to do
+ this automatically. */
struct hostent *
ngethostbyname (const char *name)
{
return hp;
}
-/* Search for HOST in the linked list L, by hostname. Return the
- entry, if found, or NULL. The search is case-insensitive. */
-static struct host *
-search_host (struct host *l, const char *host)
-{
- for (; l; l = l->next)
- if (strcasecmp (l->hostname, host) == 0)
- return l;
- return NULL;
-}
+/* Add host name HOST with the address ADDR_TEXT to the cache.
+ Normally this means that the (HOST, ADDR_TEXT) pair will be to
+ host_name_address_map and to host_address_name_map. (It is the
+ caller's responsibility to make sure that HOST is not already in
+ host_name_address_map.)
-/* Like search_host, but searches by address. */
-static struct host *
-search_address (struct host *l, const char *address)
+ If the ADDR_TEXT has already been seen and belongs to another host,
+ HOST will be added to host_slave_master_map instead. */
+
+static void
+add_host_to_cache (const char *host, const char *addr_text)
{
- for (; l; l = l->next)
+ char *canonical_name = hash_table_get (host_address_name_map, addr_text);
+ if (canonical_name)
+ {
+ DEBUGP (("Mapping %s to %s in host_slave_master_map.\n",
+ host, canonical_name));
+ /* We've already dealt with that host under another name. */
+ hash_table_put (host_slave_master_map,
+ xstrdup_lower (host),
+ xstrdup_lower (canonical_name));
+ }
+ else
{
- int cmp = strcmp (l->realname, address);
- if (cmp == 0)
- return l;
- else if (cmp > 0)
- return NULL;
+ /* This is really the first time we're dealing with that host. */
+ char *h_copy = xstrdup_lower (host);
+ char *a_copy = xstrdup (addr_text);
+ DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy));
+ hash_table_put (host_name_address_map, h_copy, a_copy);
+ hash_table_put (host_address_name_map, a_copy, h_copy);
}
- return NULL;
}
-/* Store the address of HOSTNAME, internet-style, to WHERE. First
- check for it in the host list, and (if not found), use
- ngethostbyname to get it.
+/* Store the address of HOSTNAME, internet-style (four octets in
+ network order), to WHERE. First try to get the address from the
+ cache; if it is not available, call the DNS functions and update
+ the cache.
Return 1 on successful finding of the hostname, 0 otherwise. */
int
store_hostaddress (unsigned char *where, const char *hostname)
{
- struct host *t;
unsigned long addr;
+ char *addr_text;
+ char *canonical_name;
struct hostent *hptr;
struct in_addr in;
char *inet_s;
/* If the address is of the form d.d.d.d, there will be no trouble
with it. */
addr = (unsigned long)inet_addr (hostname);
- if ((int)addr == -1)
- {
- /* If it is not of that form, try to find it in the cache. */
- t = search_host (hlist, hostname);
- if (t)
- addr = (unsigned long)inet_addr (t->realname);
- }
/* If we have the numeric address, just store it. */
if ((int)addr != -1)
{
- /* This works on both little and big endian architecture, as
- inet_addr returns the address in the proper order. */
+ /* ADDR is defined to be in network byte order, meaning the code
+ works on little and big endian 32-bit architectures without
+ change. On big endian 64-bit architectures we need to be
+ careful to copy the correct four bytes. */
+ int offset;
+ have_addr:
#ifdef WORDS_BIGENDIAN
- if (sizeof (addr) == 8)
- {
- /* We put the shift amount in a variable because it quiets gcc -Wall's
- warning on 32-bit-address systems: "warning: left shift count >=
- width of type". The optimizer should constant-fold away this
- variable (you'd think the warning would come back with maximum
- optimization turned on, but it doesn't, on gcc 2.8.1, at least).
- Not sure if there's a cleaner way to get rid of the warning -- can
- this code be surrounded by an #ifdef that's never active on 32-bit
- systems? Is there no way to check at configure-time whether we'll
- ever potentially encounter a 64-bit address? */
- int shift_amount = 32;
-
- addr <<= shift_amount;
- }
+ offset = sizeof (unsigned long) - 4;
+#else
+ offset = 0;
#endif
- memcpy (where, &addr, 4);
+ memcpy (where, (char *)&addr + offset, 4);
return 1;
}
+
+ /* By now we know that the address is not of the form d.d.d.d. Try
+ to find it in our cache of host addresses. */
+ addr_text = hash_table_get (host_name_address_map, hostname);
+ if (addr_text)
+ {
+ DEBUGP (("Found %s in host_name_address_map: %s\n",
+ hostname, addr_text));
+ addr = (unsigned long)inet_addr (addr_text);
+ goto have_addr;
+ }
+
+ /* Maybe this host is known to us under another name. If so, we'll
+ find it in host_slave_master_map, and use the master name to find
+ its address in host_name_address_map. */
+ canonical_name = hash_table_get (host_slave_master_map, hostname);
+ if (canonical_name)
+ {
+ addr_text = hash_table_get (host_name_address_map, canonical_name);
+ assert (addr_text != NULL);
+ DEBUGP (("Found %s as slave of %s -> %s\n",
+ hostname, canonical_name, addr_text));
+ addr = (unsigned long)inet_addr (addr_text);
+ goto have_addr;
+ }
+
/* Since all else has failed, let's try gethostbyname(). Note that
we use gethostbyname() rather than ngethostbyname(), because we
- *know* the address is not numerical. */
+ already know that the address is not numerical. */
hptr = gethostbyname (hostname);
if (!hptr)
return 0;
/* Copy the address of the host to socket description. */
memcpy (where, hptr->h_addr_list[0], hptr->h_length);
- /* Now that we're here, we could as well cache the hostname for
- future use, as in realhost(). First, we have to look for it by
- address to know if it's already in the cache by another name. */
+ assert (hptr->h_length == 4);
+ /* Now that we've gone through the truoble of calling
+ gethostbyname(), we can store this valuable information to the
+ cache. First, we have to look for it by address to know if it's
+ already in the cache by another name. */
/* Originally, we copied to in.s_addr, but it appears to be missing
on some systems. */
memcpy (&in, *hptr->h_addr_list, sizeof (in));
- STRDUP_ALLOCA (inet_s, inet_ntoa (in));
- t = search_address (hlist, inet_s);
- if (t) /* Found in the list, as realname. */
- {
- /* Set the default, 0 quality. */
- hlist = add_hlist (hlist, hostname, inet_s, 0);
- return 1;
- }
- /* Since this is really the first time this host is encountered,
- set quality to 1. */
- hlist = add_hlist (hlist, hostname, inet_s, 1);
+ inet_s = inet_ntoa (in);
+ add_host_to_cache (hostname, inet_s);
return 1;
}
-/* Add a host to the host list. The list is sorted by addresses. For
- equal addresses, the entries with quality should bubble towards the
- beginning of the list. */
-static struct host *
-add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
-{
- struct host *t, *old, *beg;
-
- /* The entry goes to the beginning of the list if the list is empty
- or the order requires it. */
- if (!l || (strcmp (nreal, l->realname) < 0))
- {
- t = (struct host *)xmalloc (sizeof (struct host));
- t->hostname = xstrdup (nhost);
- t->realname = xstrdup (nreal);
- t->quality = quality;
- t->next = l;
- return t;
- }
-
- beg = l;
- /* Second two one-before-the-last element. */
- while (l->next)
- {
- int cmp;
- old = l;
- l = l->next;
- cmp = strcmp (nreal, l->realname);
- if (cmp >= 0)
- continue;
- /* If the next list element is greater than s, put s between the
- current and the next list element. */
- t = (struct host *)xmalloc (sizeof (struct host));
- old->next = t;
- t->next = l;
- t->hostname = xstrdup (nhost);
- t->realname = xstrdup (nreal);
- t->quality = quality;
- return beg;
- }
- t = (struct host *)xmalloc (sizeof (struct host));
- t->hostname = xstrdup (nhost);
- t->realname = xstrdup (nreal);
- t->quality = quality;
- /* Insert the new element after the last element. */
- l->next = t;
- t->next = NULL;
- return beg;
-}
-
/* Determine the "real" name of HOST, as perceived by Wget. If HOST
is referenced by more than one name, "real" name is considered to
- be the first one encountered in the past.
-
- If the host cannot be found in the list of already dealt-with
- hosts, try with its INET address. If this fails too, add it to the
- list. The routine does not call gethostbyname twice for the same
- host if it can possibly avoid it. */
+ be the first one encountered in the past. */
char *
realhost (const char *host)
{
- struct host *l;
struct in_addr in;
struct hostent *hptr;
- char *inet_s;
+ char *master_name;
- DEBUGP (("Checking for %s.\n", host));
- /* Look for the host, looking by the host name. */
- l = search_host (hlist, host);
- if (l && l->quality) /* Found it with quality */
- {
- DEBUGP (("%s was already used, by that name.\n", host));
- /* Here we return l->hostname, not host, because of the possible
- case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
- the same, but we want the one that was first. */
- return xstrdup (l->hostname);
- }
- else if (!l) /* Not found, with or without quality */
+ DEBUGP (("Checking for %s in host_name_address_map.\n", host));
+ if (hash_table_contains (host_name_address_map, host))
{
- /* The fact that gethostbyname will get called makes it
- necessary to store it to the list, to ensure that
- gethostbyname will not be called twice for the same string.
- However, the quality argument must be set appropriately.
-
- Note that add_hlist must be called *after* the realname
- search, or the quality would be always set to 0 */
- DEBUGP (("This is the first time I hear about host %s by that name.\n",
- host));
- hptr = ngethostbyname (host);
- if (!hptr)
- return xstrdup (host);
- /* Originally, we copied to in.s_addr, but it appears to be
- missing on some systems. */
- memcpy (&in, *hptr->h_addr_list, sizeof (in));
- STRDUP_ALLOCA (inet_s, inet_ntoa (in));
+ DEBUGP (("Found; %s was already used, by that name.\n", host));
+ return xstrdup_lower (host);
}
- else /* Found, without quality */
+
+ DEBUGP (("Checking for %s in host_slave_master_map.\n", host));
+ master_name = hash_table_get (host_slave_master_map, host);
+ if (master_name)
{
- /* This case happens when host is on the list,
- but not as first entry (the one with quality).
- Then we just get its INET address and pick
- up the first entry with quality. */
- DEBUGP (("We've dealt with host %s, but under the name %s.\n",
- host, l->realname));
- STRDUP_ALLOCA (inet_s, l->realname);
+ has_master:
+ DEBUGP (("Found; %s was already used, by the name %s.\n",
+ host, master_name));
+ return xstrdup (master_name);
}
- /* Now we certainly have the INET address. The following loop is
- guaranteed to pick either an entry with quality (because it is
- the first one), or none at all. */
- l = search_address (hlist, inet_s);
- if (l) /* Found in the list, as realname. */
+ DEBUGP (("First time I hear about %s by that name; looking it up.\n",
+ host));
+ hptr = ngethostbyname (host);
+ if (hptr)
{
- /* Set the default, 0 quality. */
- hlist = add_hlist (hlist, host, inet_s, 0);
- return xstrdup (l->hostname);
+ char *inet_s;
+ /* Originally, we copied to in.s_addr, but it appears to be
+ missing on some systems. */
+ memcpy (&in, *hptr->h_addr_list, sizeof (in));
+ inet_s = inet_ntoa (in);
+
+ add_host_to_cache (host, inet_s);
+
+ /* add_host_to_cache() can establish a slave-master mapping. */
+ DEBUGP (("Checking again for %s in host_slave_master_map.\n", host));
+ master_name = hash_table_get (host_slave_master_map, host);
+ if (master_name)
+ goto has_master;
}
- /* Since this is really the first time this host is encountered,
- set quality to 1. */
- hlist = add_hlist (hlist, host, inet_s, 1);
- return xstrdup (host);
+
+ return xstrdup_lower (host);
}
/* Compare two hostnames (out of URL-s if the arguments are URL-s),
char *real1, *real2;
/* Skip protocol, if present. */
- u1 += skip_url (u1);
- u2 += skip_url (u2);
- u1 += skip_proto (u1);
- u2 += skip_proto (u2);
+ u1 += url_skip_scheme (u1);
+ u2 += url_skip_scheme (u2);
/* Skip username ans password, if present. */
- u1 += skip_uname (u1);
- u2 += skip_uname (u2);
+ u1 += url_skip_uname (u1);
+ u2 += url_skip_uname (u2);
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
p1 = strdupdelim (s, u1);
DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
if (strcasecmp (p1, p2) == 0)
{
- free (p1);
- free (p2);
+ xfree (p1);
+ xfree (p2);
DEBUGP (("They are quite alike.\n"));
return 1;
}
else if (opt.simple_check)
{
- free (p1);
- free (p2);
+ xfree (p1);
+ xfree (p2);
DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
return 0;
}
real1 = realhost (p1);
real2 = realhost (p2);
- free (p1);
- free (p2);
+ xfree (p1);
+ xfree (p2);
if (strcasecmp (real1, real2) == 0)
{
DEBUGP (("They are alike, after realhost()->%s.\n", real1));
- free (real1);
- free (real2);
+ xfree (real1);
+ xfree (real2);
return 1;
}
else
{
DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
- free (real1);
- free (real2);
+ xfree (real1);
+ xfree (real2);
return 0;
}
}
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
-accept_domain (struct urlinfo *u)
+accept_domain (struct url *u)
{
assert (u->host != NULL);
if (opt.domains)
return 0;
}
-/* Return email address of the form username@FQDN suitable for
- anonymous FTP passwords. This process is error-prone, and the
- escape hatch is the MY_HOST preprocessor constant, which can be
- used to hard-code either your hostname or FQDN at compile-time.
-
- If the FQDN cannot be determined, a warning is printed, and the
- function returns a short `username@' form, accepted by most
- anonymous servers.
-
- If not even the username cannot be divined, it means things are
- seriously fucked up, and Wget exits. */
-char *
-ftp_getaddress (void)
-{
- static char *address;
-
- /* Do the drill only the first time, as it won't change. */
- if (!address)
- {
- char userid[32]; /* 9 should be enough for Unix, but
- I'd rather be on the safe side. */
- char *host, *fqdn;
-
- if (!pwd_cuserid (userid))
- {
- logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
- exec_name);
- exit (1);
- }
-#ifdef MY_HOST
- STRDUP_ALLOCA (host, MY_HOST);
-#else /* not MY_HOST */
-#ifdef HAVE_UNAME
- {
- struct utsname ubuf;
- if (uname (&ubuf) < 0)
- {
- logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
- exec_name, strerror (errno));
- fqdn = "";
- goto giveup;
- }
- STRDUP_ALLOCA (host, ubuf.nodename);
- }
-#else /* not HAVE_UNAME */
-#ifdef HAVE_GETHOSTNAME
- host = alloca (256);
- if (gethostname (host, 256) < 0)
- {
- logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
- exec_name);
- fqdn = "";
- goto giveup;
- }
-#else /* not HAVE_GETHOSTNAME */
- #error Cannot determine host name.
-#endif /* not HAVE_GETHOSTNAME */
-#endif /* not HAVE_UNAME */
-#endif /* not MY_HOST */
- /* If the address we got so far contains a period, don't bother
- anymore. */
- if (strchr (host, '.'))
- fqdn = host;
- else
- {
- /* #### I've seen the following scheme fail on at least one
- system! Do we care? */
- char *tmpstore;
- /* According to Richard Stevens, the correct way to find the
- FQDN is to (1) find the host name, (2) find its IP
- address using gethostbyname(), and (3) get the FQDN using
- gethostbyaddr(). So that's what we'll do. Step one has
- been done above. */
- /* (2) */
- struct hostent *hp = gethostbyname (host);
- if (!hp || !hp->h_addr_list)
- {
- logprintf (LOG_ALWAYS, _("\
-%s: Warning: cannot determine local IP address.\n"),
- exec_name);
- fqdn = "";
- goto giveup;
- }
- /* Copy the argument, so the call to gethostbyaddr doesn't
- clobber it -- just in case. */
- tmpstore = (char *)alloca (hp->h_length);
- memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
- /* (3) */
- hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
- if (!hp || !hp->h_name)
- {
- logprintf (LOG_ALWAYS, _("\
-%s: Warning: cannot reverse-lookup local IP address.\n"),
- exec_name);
- fqdn = "";
- goto giveup;
- }
- if (!strchr (hp->h_name, '.'))
- {
-#if 0
- /* This gets ticked pretty often. Karl Berry reports
- that there can be valid reasons for the local host
- name not to be an FQDN, so I've decided to remove the
- annoying warning. */
- logprintf (LOG_ALWAYS, _("\
-%s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
- exec_name);
-#endif
- fqdn = "";
- goto giveup;
- }
- /* Once we're here, hp->h_name contains the correct FQDN. */
- STRDUP_ALLOCA (fqdn, hp->h_name);
- }
- giveup:
- address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
- sprintf (address, "%s@%s", userid, fqdn);
- }
- return address;
-}
-
/* Print error messages for host errors. */
char *
herrmsg (int error)
return _("Unknown error");
}
-/* Clean the host list. This is a separate function, so we needn't
- export HLIST and its implementation. Ha! */
void
clean_hosts (void)
{
- struct host *l = hlist;
+ /* host_name_address_map and host_address_name_map share the
+ strings. Because of that, calling free_keys_and_values once
+ suffices for both. */
+ free_keys_and_values (host_name_address_map);
+ hash_table_destroy (host_name_address_map);
+ hash_table_destroy (host_address_name_map);
+ free_keys_and_values (host_slave_master_map);
+ hash_table_destroy (host_slave_master_map);
+}
- while (l)
- {
- struct host *p = l->next;
- free (l->hostname);
- free (l->realname);
- free (l);
- l = p;
- }
- hlist = NULL;
+void
+host_init (void)
+{
+ host_name_address_map = make_string_hash_table (0);
+ host_address_name_map = make_string_hash_table (0);
+ host_slave_master_map = make_string_hash_table (0);
}