1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
38 # include <arpa/inet.h>
42 #ifdef HAVE_SYS_UTSNAME_H
43 # include <sys/utsname.h>
59 /* Host's symbolical name, as encountered at the time of first
60 inclusion, e.g. "fly.cc.fer.hr". */
62 /* Host's "real" name, i.e. its IP address, written out in ASCII
63 form of N.N.N.N, e.g. "161.53.70.130". */
65 /* More than one HOSTNAME can correspond to the same REALNAME. For
66 our purposes, the canonical name of the host is its HOSTNAME when
67 it was first encountered. This entry is said to have QUALITY. */
69 /* Next entry in the list. */
73 static struct host *hlist;
75 static struct host *add_hlist PARAMS ((struct host *, const char *,
78 /* The same as gethostbyname, but supports internet addresses of the
81 ngethostbyname (const char *name)
86 addr = (unsigned long)inet_addr (name);
88 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
90 hp = gethostbyname (name);
94 /* Search for HOST in the linked list L, by hostname. Return the
95 entry, if found, or NULL. The search is case-insensitive. */
97 search_host (struct host *l, const char *host)
99 for (; l; l = l->next)
100 if (strcasecmp (l->hostname, host) == 0)
105 /* Like search_host, but searches by address. */
107 search_address (struct host *l, const char *address)
109 for (; l; l = l->next)
111 int cmp = strcmp (l->realname, address);
120 /* Store the address of HOSTNAME, internet-style, to WHERE. First
121 check for it in the host list, and (if not found), use
122 ngethostbyname to get it.
124 Return 1 on successful finding of the hostname, 0 otherwise. */
126 store_hostaddress (unsigned char *where, const char *hostname)
130 struct hostent *hptr;
134 /* If the address is of the form d.d.d.d, there will be no trouble
136 addr = (unsigned long)inet_addr (hostname);
139 /* If it is not of that form, try to find it in the cache. */
140 t = search_host (hlist, hostname);
142 addr = (unsigned long)inet_addr (t->realname);
144 /* If we have the numeric address, just store it. */
147 /* ADDR is in network byte order, meaning the code works on
148 little and big endian 32-bit architectures without change.
149 On big endian 64-bit architectures we need to be careful to
150 copy the correct four bytes. */
152 #ifdef WORDS_BIGENDIAN
153 offset = sizeof (unsigned long) - 4;
155 memcpy (where, (char *)&addr + offset, 4);
158 /* Since all else has failed, let's try gethostbyname(). Note that
159 we use gethostbyname() rather than ngethostbyname(), because we
160 *know* the address is not numerical. */
161 hptr = gethostbyname (hostname);
164 /* Copy the address of the host to socket description. */
165 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
166 /* Now that we're here, we could as well cache the hostname for
167 future use, as in realhost(). First, we have to look for it by
168 address to know if it's already in the cache by another name. */
170 /* Originally, we copied to in.s_addr, but it appears to be missing
172 memcpy (&in, *hptr->h_addr_list, sizeof (in));
173 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
174 t = search_address (hlist, inet_s);
175 if (t) /* Found in the list, as realname. */
177 /* Set the default, 0 quality. */
178 hlist = add_hlist (hlist, hostname, inet_s, 0);
181 /* Since this is really the first time this host is encountered,
183 hlist = add_hlist (hlist, hostname, inet_s, 1);
187 /* Add a host to the host list. The list is sorted by addresses. For
188 equal addresses, the entries with quality should bubble towards the
189 beginning of the list. */
191 add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
193 struct host *t, *old, *beg;
195 /* The entry goes to the beginning of the list if the list is empty
196 or the order requires it. */
197 if (!l || (strcmp (nreal, l->realname) < 0))
199 t = (struct host *)xmalloc (sizeof (struct host));
200 t->hostname = xstrdup (nhost);
201 t->realname = xstrdup (nreal);
202 t->quality = quality;
208 /* Second two one-before-the-last element. */
214 cmp = strcmp (nreal, l->realname);
217 /* If the next list element is greater than s, put s between the
218 current and the next list element. */
219 t = (struct host *)xmalloc (sizeof (struct host));
222 t->hostname = xstrdup (nhost);
223 t->realname = xstrdup (nreal);
224 t->quality = quality;
227 t = (struct host *)xmalloc (sizeof (struct host));
228 t->hostname = xstrdup (nhost);
229 t->realname = xstrdup (nreal);
230 t->quality = quality;
231 /* Insert the new element after the last element. */
237 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
238 is referenced by more than one name, "real" name is considered to
239 be the first one encountered in the past.
241 If the host cannot be found in the list of already dealt-with
242 hosts, try with its INET address. If this fails too, add it to the
243 list. The routine does not call gethostbyname twice for the same
244 host if it can possibly avoid it. */
246 realhost (const char *host)
250 struct hostent *hptr;
253 DEBUGP (("Checking for %s.\n", host));
254 /* Look for the host, looking by the host name. */
255 l = search_host (hlist, host);
256 if (l && l->quality) /* Found it with quality */
258 DEBUGP (("%s was already used, by that name.\n", host));
259 /* Here we return l->hostname, not host, because of the possible
260 case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
261 the same, but we want the one that was first. */
262 return xstrdup (l->hostname);
264 else if (!l) /* Not found, with or without quality */
266 /* The fact that gethostbyname will get called makes it
267 necessary to store it to the list, to ensure that
268 gethostbyname will not be called twice for the same string.
269 However, the quality argument must be set appropriately.
271 Note that add_hlist must be called *after* the realname
272 search, or the quality would be always set to 0 */
273 DEBUGP (("This is the first time I hear about host %s by that name.\n",
275 hptr = ngethostbyname (host);
277 return xstrdup (host);
278 /* Originally, we copied to in.s_addr, but it appears to be
279 missing on some systems. */
280 memcpy (&in, *hptr->h_addr_list, sizeof (in));
281 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
283 else /* Found, without quality */
285 /* This case happens when host is on the list,
286 but not as first entry (the one with quality).
287 Then we just get its INET address and pick
288 up the first entry with quality. */
289 DEBUGP (("We've dealt with host %s, but under the name %s.\n",
291 STRDUP_ALLOCA (inet_s, l->realname);
294 /* Now we certainly have the INET address. The following loop is
295 guaranteed to pick either an entry with quality (because it is
296 the first one), or none at all. */
297 l = search_address (hlist, inet_s);
298 if (l) /* Found in the list, as realname. */
300 /* Set the default, 0 quality. */
301 hlist = add_hlist (hlist, host, inet_s, 0);
302 return xstrdup (l->hostname);
304 /* Since this is really the first time this host is encountered,
306 hlist = add_hlist (hlist, host, inet_s, 1);
307 return xstrdup (host);
310 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
311 taking care of aliases. It uses realhost() to determine a unique
312 hostname for each of two hosts. If simple_check is non-zero, only
313 strcmp() is used for comparison. */
315 same_host (const char *u1, const char *u2)
321 /* Skip protocol, if present. */
324 u1 += skip_proto (u1);
325 u2 += skip_proto (u2);
327 /* Skip username ans password, if present. */
328 u1 += skip_uname (u1);
329 u2 += skip_uname (u2);
331 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
332 p1 = strdupdelim (s, u1);
333 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
334 p2 = strdupdelim (s, u2);
335 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
336 if (strcasecmp (p1, p2) == 0)
340 DEBUGP (("They are quite alike.\n"));
343 else if (opt.simple_check)
347 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
350 real1 = realhost (p1);
351 real2 = realhost (p2);
354 if (strcasecmp (real1, real2) == 0)
356 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
363 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
370 /* Determine whether a URL is acceptable to be followed, according to
371 a list of domains to accept. */
373 accept_domain (struct urlinfo *u)
375 assert (u->host != NULL);
378 if (!sufmatch ((const char **)opt.domains, u->host))
381 if (opt.exclude_domains)
383 if (sufmatch ((const char **)opt.exclude_domains, u->host))
389 /* Check whether WHAT is matched in LIST, each element of LIST being a
390 pattern to match WHAT against, using backward matching (see
391 match_backwards() in utils.c).
393 If an element of LIST matched, 1 is returned, 0 otherwise. */
395 sufmatch (const char **list, const char *what)
400 for (i = 0; list[i]; i++)
402 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
403 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
405 /* The domain must be first to reach to beginning. */
412 /* Return email address of the form username@FQDN suitable for
413 anonymous FTP passwords. This process is error-prone, and the
414 escape hatch is the MY_HOST preprocessor constant, which can be
415 used to hard-code either your hostname or FQDN at compile-time.
417 If the FQDN cannot be determined, a warning is printed, and the
418 function returns a short `username@' form, accepted by most
421 If not even the username cannot be divined, it means things are
422 seriously fucked up, and Wget exits. */
424 ftp_getaddress (void)
426 static char *address;
428 /* Do the drill only the first time, as it won't change. */
431 char userid[32]; /* 9 should be enough for Unix, but
432 I'd rather be on the safe side. */
435 if (!pwd_cuserid (userid))
437 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
442 STRDUP_ALLOCA (host, MY_HOST);
443 #else /* not MY_HOST */
447 if (uname (&ubuf) < 0)
449 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
450 exec_name, strerror (errno));
454 STRDUP_ALLOCA (host, ubuf.nodename);
456 #else /* not HAVE_UNAME */
457 #ifdef HAVE_GETHOSTNAME
459 if (gethostname (host, 256) < 0)
461 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
466 #else /* not HAVE_GETHOSTNAME */
467 #error Cannot determine host name.
468 #endif /* not HAVE_GETHOSTNAME */
469 #endif /* not HAVE_UNAME */
470 #endif /* not MY_HOST */
471 /* If the address we got so far contains a period, don't bother
473 if (strchr (host, '.'))
477 /* #### I've seen the following scheme fail on at least one
478 system! Do we care? */
480 /* According to Richard Stevens, the correct way to find the
481 FQDN is to (1) find the host name, (2) find its IP
482 address using gethostbyname(), and (3) get the FQDN using
483 gethostbyaddr(). So that's what we'll do. Step one has
486 struct hostent *hp = gethostbyname (host);
487 if (!hp || !hp->h_addr_list)
489 logprintf (LOG_ALWAYS, _("\
490 %s: Warning: cannot determine local IP address.\n"),
495 /* Copy the argument, so the call to gethostbyaddr doesn't
496 clobber it -- just in case. */
497 tmpstore = (char *)alloca (hp->h_length);
498 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
500 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
501 if (!hp || !hp->h_name)
503 logprintf (LOG_ALWAYS, _("\
504 %s: Warning: cannot reverse-lookup local IP address.\n"),
509 if (!strchr (hp->h_name, '.'))
512 /* This gets ticked pretty often. Karl Berry reports
513 that there can be valid reasons for the local host
514 name not to be an FQDN, so I've decided to remove the
516 logprintf (LOG_ALWAYS, _("\
517 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
523 /* Once we're here, hp->h_name contains the correct FQDN. */
524 STRDUP_ALLOCA (fqdn, hp->h_name);
527 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
528 sprintf (address, "%s@%s", userid, fqdn);
533 /* Print error messages for host errors. */
537 /* Can't use switch since some constants are equal (at least on my
538 system), and the compiler signals "duplicate case value". */
539 if (error == HOST_NOT_FOUND
540 || error == NO_RECOVERY
542 || error == NO_ADDRESS
543 || error == TRY_AGAIN)
544 return _("Host not found");
546 return _("Unknown error");
549 /* Clean the host list. This is a separate function, so we needn't
550 export HLIST and its implementation. Ha! */
554 struct host *l = hlist;
558 struct host *p = l->next;