1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
38 # include <arpa/inet.h>
42 #ifdef HAVE_SYS_UTSNAME_H
43 # include <sys/utsname.h>
59 /* Host's symbolical name, as encountered at the time of first
60 inclusion, e.g. "fly.cc.fer.hr". */
62 /* Host's "real" name, i.e. its IP address, written out in ASCII
63 form of N.N.N.N, e.g. "161.53.70.130". */
65 /* More than one HOSTNAME can correspond to the same REALNAME. For
66 our purposes, the canonical name of the host is its HOSTNAME when
67 it was first encountered. This entry is said to have QUALITY. */
69 /* Next entry in the list. */
73 static struct host *hlist;
75 static struct host *add_hlist PARAMS ((struct host *, const char *,
78 /* The same as gethostbyname, but supports internet addresses of the
81 ngethostbyname (const char *name)
86 addr = (unsigned long)inet_addr (name);
88 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
90 hp = gethostbyname (name);
94 /* Search for HOST in the linked list L, by hostname. Return the
95 entry, if found, or NULL. The search is case-insensitive. */
97 search_host (struct host *l, const char *host)
99 for (; l; l = l->next)
100 if (strcasecmp (l->hostname, host) == 0)
105 /* Like search_host, but searches by address. */
107 search_address (struct host *l, const char *address)
109 for (; l; l = l->next)
111 int cmp = strcmp (l->realname, address);
120 /* Store the address of HOSTNAME, internet-style, to WHERE. First
121 check for it in the host list, and (if not found), use
122 ngethostbyname to get it.
124 Return 1 on successful finding of the hostname, 0 otherwise. */
126 store_hostaddress (unsigned char *where, const char *hostname)
130 struct hostent *hptr;
134 /* If the address is of the form d.d.d.d, there will be no trouble
136 addr = (unsigned long)inet_addr (hostname);
139 /* If it is not of that form, try to find it in the cache. */
140 t = search_host (hlist, hostname);
142 addr = (unsigned long)inet_addr (t->realname);
144 /* If we have the numeric address, just store it. */
147 /* This works on both little and big endian architecture, as
148 inet_addr returns the address in the proper order. */
149 #ifdef WORDS_BIGENDIAN
150 if (sizeof (addr) == 8)
153 memcpy (where, &addr, 4);
156 /* Since all else has failed, let's try gethostbyname(). Note that
157 we use gethostbyname() rather than ngethostbyname(), because we
158 *know* the address is not numerical. */
159 hptr = gethostbyname (hostname);
162 /* Copy the address of the host to socket description. */
163 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
164 /* Now that we're here, we could as well cache the hostname for
165 future use, as in realhost(). First, we have to look for it by
166 address to know if it's already in the cache by another name. */
168 /* Originally, we copied to in.s_addr, but it appears to be missing
170 memcpy (&in, *hptr->h_addr_list, sizeof (in));
171 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
172 t = search_address (hlist, inet_s);
173 if (t) /* Found in the list, as realname. */
175 /* Set the default, 0 quality. */
176 hlist = add_hlist (hlist, hostname, inet_s, 0);
179 /* Since this is really the first time this host is encountered,
181 hlist = add_hlist (hlist, hostname, inet_s, 1);
185 /* Add a host to the host list. The list is sorted by addresses. For
186 equal addresses, the entries with quality should bubble towards the
187 beginning of the list. */
189 add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
191 struct host *t, *old, *beg;
193 /* The entry goes to the beginning of the list if the list is empty
194 or the order requires it. */
195 if (!l || (strcmp (nreal, l->realname) < 0))
197 t = (struct host *)xmalloc (sizeof (struct host));
198 t->hostname = xstrdup (nhost);
199 t->realname = xstrdup (nreal);
200 t->quality = quality;
206 /* Second two one-before-the-last element. */
212 cmp = strcmp (nreal, l->realname);
215 /* If the next list element is greater than s, put s between the
216 current and the next list element. */
217 t = (struct host *)xmalloc (sizeof (struct host));
220 t->hostname = xstrdup (nhost);
221 t->realname = xstrdup (nreal);
222 t->quality = quality;
225 t = (struct host *)xmalloc (sizeof (struct host));
226 t->hostname = xstrdup (nhost);
227 t->realname = xstrdup (nreal);
228 t->quality = quality;
229 /* Insert the new element after the last element. */
235 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
236 is referenced by more than one name, "real" name is considered to
237 be the first one encountered in the past.
239 If the host cannot be found in the list of already dealt-with
240 hosts, try with its INET address. If this fails too, add it to the
241 list. The routine does not call gethostbyname twice for the same
242 host if it can possibly avoid it. */
244 realhost (const char *host)
248 struct hostent *hptr;
251 DEBUGP (("Checking for %s.\n", host));
252 /* Look for the host, looking by the host name. */
253 l = search_host (hlist, host);
254 if (l && l->quality) /* Found it with quality */
256 DEBUGP (("%s was already used, by that name.\n", host));
257 /* Here we return l->hostname, not host, because of the possible
258 case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
259 the same, but we want the one that was first. */
260 return xstrdup (l->hostname);
262 else if (!l) /* Not found, with or without quality */
264 /* The fact that gethostbyname will get called makes it
265 necessary to store it to the list, to ensure that
266 gethostbyname will not be called twice for the same string.
267 However, the quality argument must be set appropriately.
269 Note that add_hlist must be called *after* the realname
270 search, or the quality would be always set to 0 */
271 DEBUGP (("This is the first time I hear about host %s by that name.\n",
273 hptr = ngethostbyname (host);
275 return xstrdup (host);
276 /* Originally, we copied to in.s_addr, but it appears to be
277 missing on some systems. */
278 memcpy (&in, *hptr->h_addr_list, sizeof (in));
279 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
281 else /* Found, without quality */
283 /* This case happens when host is on the list,
284 but not as first entry (the one with quality).
285 Then we just get its INET address and pick
286 up the first entry with quality. */
287 DEBUGP (("We've dealt with host %s, but under the name %s.\n",
289 STRDUP_ALLOCA (inet_s, l->realname);
292 /* Now we certainly have the INET address. The following loop is
293 guaranteed to pick either an entry with quality (because it is
294 the first one), or none at all. */
295 l = search_address (hlist, inet_s);
296 if (l) /* Found in the list, as realname. */
298 /* Set the default, 0 quality. */
299 hlist = add_hlist (hlist, host, inet_s, 0);
300 return xstrdup (l->hostname);
302 /* Since this is really the first time this host is encountered,
304 hlist = add_hlist (hlist, host, inet_s, 1);
305 return xstrdup (host);
308 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
309 taking care of aliases. It uses realhost() to determine a unique
310 hostname for each of two hosts. If simple_check is non-zero, only
311 strcmp() is used for comparison. */
313 same_host (const char *u1, const char *u2)
319 /* Skip protocol, if present. */
322 u1 += skip_proto (u1);
323 u2 += skip_proto (u2);
325 /* Skip username ans password, if present. */
326 u1 += skip_uname (u1);
327 u2 += skip_uname (u2);
329 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
330 p1 = strdupdelim (s, u1);
331 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
332 p2 = strdupdelim (s, u2);
333 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
334 if (strcasecmp (p1, p2) == 0)
338 DEBUGP (("They are quite alike.\n"));
341 else if (opt.simple_check)
345 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
348 real1 = realhost (p1);
349 real2 = realhost (p2);
352 if (strcasecmp (real1, real2) == 0)
354 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
361 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
368 /* Determine whether a URL is acceptable to be followed, according to
369 a list of domains to accept. */
371 accept_domain (struct urlinfo *u)
373 assert (u->host != NULL);
376 if (!sufmatch ((const char **)opt.domains, u->host))
379 if (opt.exclude_domains)
381 if (sufmatch ((const char **)opt.exclude_domains, u->host))
387 /* Check whether WHAT is matched in LIST, each element of LIST being a
388 pattern to match WHAT against, using backward matching (see
389 match_backwards() in utils.c).
391 If an element of LIST matched, 1 is returned, 0 otherwise. */
393 sufmatch (const char **list, const char *what)
398 for (i = 0; list[i]; i++)
400 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
401 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
403 /* The domain must be first to reach to beginning. */
410 /* Return email address of the form username@FQDN suitable for
411 anonymous FTP passwords. This process is error-prone, and the
412 escape hatch is the MY_HOST preprocessor constant, which can be
413 used to hard-code either your hostname or FQDN at compile-time.
415 If the FQDN cannot be determined, a warning is printed, and the
416 function returns a short `username@' form, accepted by most
419 If not even the username cannot be divined, it means things are
420 seriously fucked up, and Wget exits. */
422 ftp_getaddress (void)
424 static char *address;
426 /* Do the drill only the first time, as it won't change. */
429 char userid[32]; /* 9 should be enough for Unix, but
430 I'd rather be on the safe side. */
433 if (!pwd_cuserid (userid))
435 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
440 STRDUP_ALLOCA (host, MY_HOST);
441 #else /* not MY_HOST */
445 if (uname (&ubuf) < 0)
447 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
448 exec_name, strerror (errno));
452 STRDUP_ALLOCA (host, ubuf.nodename);
454 #else /* not HAVE_UNAME */
455 #ifdef HAVE_GETHOSTNAME
457 if (gethostname (host, 256) < 0)
459 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
464 #else /* not HAVE_GETHOSTNAME */
465 #error Cannot determine host name.
466 #endif /* not HAVE_GETHOSTNAME */
467 #endif /* not HAVE_UNAME */
468 #endif /* not MY_HOST */
469 /* If the address we got so far contains a period, don't bother
471 if (strchr (host, '.'))
475 /* #### I've seen the following scheme fail on at least one
476 system! Do we care? */
478 /* According to Richard Stevens, the correct way to find the
479 FQDN is to (1) find the host name, (2) find its IP
480 address using gethostbyname(), and (3) get the FQDN using
481 gethostbyaddr(). So that's what we'll do. Step one has
484 struct hostent *hp = gethostbyname (host);
485 if (!hp || !hp->h_addr_list)
487 logprintf (LOG_ALWAYS, _("\
488 %s: Warning: cannot determine local IP address.\n"),
493 /* Copy the argument, so the call to gethostbyaddr doesn't
494 clobber it -- just in case. */
495 tmpstore = (char *)alloca (hp->h_length);
496 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
498 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
499 if (!hp || !hp->h_name)
501 logprintf (LOG_ALWAYS, _("\
502 %s: Warning: cannot reverse-lookup local IP address.\n"),
507 if (!strchr (hp->h_name, '.'))
510 /* This gets ticked pretty often. Karl Berry reports
511 that there can be valid reasons for the local host
512 name not to be an FQDN, so I've decided to remove the
514 logprintf (LOG_ALWAYS, _("\
515 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
521 /* Once we're here, hp->h_name contains the correct FQDN. */
522 STRDUP_ALLOCA (fqdn, hp->h_name);
525 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
526 sprintf (address, "%s@%s", userid, fqdn);
531 /* Print error messages for host errors. */
535 /* Can't use switch since some constants are equal (at least on my
536 system), and the compiler signals "duplicate case value". */
537 if (error == HOST_NOT_FOUND
538 || error == NO_RECOVERY
540 || error == NO_ADDRESS
541 || error == TRY_AGAIN)
542 return _("Host not found");
544 return _("Unknown error");
547 /* Clean the host list. This is a separate function, so we needn't
548 export HLIST and its implementation. Ha! */
552 struct host *l = hlist;
556 struct host *p = l->next;