1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
38 # include <arpa/inet.h>
42 #ifdef HAVE_SYS_UTSNAME_H
43 # include <sys/utsname.h>
59 /* Host's symbolical name, as encountered at the time of first
60 inclusion, e.g. "fly.cc.fer.hr". */
62 /* Host's "real" name, i.e. its IP address, written out in ASCII
63 form of N.N.N.N, e.g. "161.53.70.130". */
65 /* More than one HOSTNAME can correspond to the same REALNAME. For
66 our purposes, the canonical name of the host is its HOSTNAME when
67 it was first encountered. This entry is said to have QUALITY. */
69 /* Next entry in the list. */
73 static struct host *hlist;
75 static struct host *add_hlist PARAMS ((struct host *, const char *,
78 /* The same as gethostbyname, but supports internet addresses of the
81 ngethostbyname (const char *name)
86 addr = (unsigned long)inet_addr (name);
88 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
90 hp = gethostbyname (name);
94 /* Search for HOST in the linked list L, by hostname. Return the
95 entry, if found, or NULL. The search is case-insensitive. */
97 search_host (struct host *l, const char *host)
99 for (; l; l = l->next)
100 if (strcasecmp (l->hostname, host) == 0)
105 /* Like search_host, but searches by address. */
107 search_address (struct host *l, const char *address)
109 for (; l; l = l->next)
111 int cmp = strcmp (l->realname, address);
120 /* Store the address of HOSTNAME, internet-style, to WHERE. First
121 check for it in the host list, and (if not found), use
122 ngethostbyname to get it.
124 Return 1 on successful finding of the hostname, 0 otherwise. */
126 store_hostaddress (unsigned char *where, const char *hostname)
130 struct hostent *hptr;
134 /* If the address is of the form d.d.d.d, there will be no trouble
136 addr = (unsigned long)inet_addr (hostname);
139 /* If it is not of that form, try to find it in the cache. */
140 t = search_host (hlist, hostname);
142 addr = (unsigned long)inet_addr (t->realname);
144 /* If we have the numeric address, just store it. */
147 /* This works on both little and big endian architecture, as
148 inet_addr returns the address in the proper order. It
149 appears to work on 64-bit machines too. */
150 memcpy (where, &addr, 4);
153 /* Since all else has failed, let's try gethostbyname(). Note that
154 we use gethostbyname() rather than ngethostbyname(), because we
155 *know* the address is not numerical. */
156 hptr = gethostbyname (hostname);
159 /* Copy the address of the host to socket description. */
160 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
161 /* Now that we're here, we could as well cache the hostname for
162 future use, as in realhost(). First, we have to look for it by
163 address to know if it's already in the cache by another name. */
165 /* Originally, we copied to in.s_addr, but it appears to be missing
167 memcpy (&in, *hptr->h_addr_list, sizeof (in));
168 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
169 t = search_address (hlist, inet_s);
170 if (t) /* Found in the list, as realname. */
172 /* Set the default, 0 quality. */
173 hlist = add_hlist (hlist, hostname, inet_s, 0);
176 /* Since this is really the first time this host is encountered,
178 hlist = add_hlist (hlist, hostname, inet_s, 1);
182 /* Add a host to the host list. The list is sorted by addresses. For
183 equal addresses, the entries with quality should bubble towards the
184 beginning of the list. */
186 add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
188 struct host *t, *old, *beg;
190 /* The entry goes to the beginning of the list if the list is empty
191 or the order requires it. */
192 if (!l || (strcmp (nreal, l->realname) < 0))
194 t = (struct host *)xmalloc (sizeof (struct host));
195 t->hostname = xstrdup (nhost);
196 t->realname = xstrdup (nreal);
197 t->quality = quality;
203 /* Second two one-before-the-last element. */
209 cmp = strcmp (nreal, l->realname);
212 /* If the next list element is greater than s, put s between the
213 current and the next list element. */
214 t = (struct host *)xmalloc (sizeof (struct host));
217 t->hostname = xstrdup (nhost);
218 t->realname = xstrdup (nreal);
219 t->quality = quality;
222 t = (struct host *)xmalloc (sizeof (struct host));
223 t->hostname = xstrdup (nhost);
224 t->realname = xstrdup (nreal);
225 t->quality = quality;
226 /* Insert the new element after the last element. */
232 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
233 is referenced by more than one name, "real" name is considered to
234 be the first one encountered in the past.
236 If the host cannot be found in the list of already dealt-with
237 hosts, try with its INET address. If this fails too, add it to the
238 list. The routine does not call gethostbyname twice for the same
239 host if it can possibly avoid it. */
241 realhost (const char *host)
245 struct hostent *hptr;
248 DEBUGP (("Checking for %s.\n", host));
249 /* Look for the host, looking by the host name. */
250 l = search_host (hlist, host);
251 if (l && l->quality) /* Found it with quality */
253 DEBUGP (("%s was already used, by that name.\n", host));
254 /* Here we return l->hostname, not host, because of the possible
255 case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
256 the same, but we want the one that was first. */
257 return xstrdup (l->hostname);
259 else if (!l) /* Not found, with or without quality */
261 /* The fact that gethostbyname will get called makes it
262 necessary to store it to the list, to ensure that
263 gethostbyname will not be called twice for the same string.
264 However, the quality argument must be set appropriately.
266 Note that add_hlist must be called *after* the realname
267 search, or the quality would be always set to 0 */
268 DEBUGP (("This is the first time I hear about host %s by that name.\n",
270 hptr = ngethostbyname (host);
272 return xstrdup (host);
273 /* Originally, we copied to in.s_addr, but it appears to be
274 missing on some systems. */
275 memcpy (&in, *hptr->h_addr_list, sizeof (in));
276 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
278 else /* Found, without quality */
280 /* This case happens when host is on the list,
281 but not as first entry (the one with quality).
282 Then we just get its INET address and pick
283 up the first entry with quality. */
284 DEBUGP (("We've dealt with host %s, but under the name %s.\n",
286 STRDUP_ALLOCA (inet_s, l->realname);
289 /* Now we certainly have the INET address. The following loop is
290 guaranteed to pick either an entry with quality (because it is
291 the first one), or none at all. */
292 l = search_address (hlist, inet_s);
293 if (l) /* Found in the list, as realname. */
295 /* Set the default, 0 quality. */
296 hlist = add_hlist (hlist, host, inet_s, 0);
297 return xstrdup (l->hostname);
299 /* Since this is really the first time this host is encountered,
301 hlist = add_hlist (hlist, host, inet_s, 1);
302 return xstrdup (host);
305 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
306 taking care of aliases. It uses realhost() to determine a unique
307 hostname for each of two hosts. If simple_check is non-zero, only
308 strcmp() is used for comparison. */
310 same_host (const char *u1, const char *u2)
316 /* Skip protocol, if present. */
319 u1 += skip_proto (u1);
320 u2 += skip_proto (u2);
322 /* Skip username ans password, if present. */
323 u1 += skip_uname (u1);
324 u2 += skip_uname (u2);
326 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
327 p1 = strdupdelim (s, u1);
328 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
329 p2 = strdupdelim (s, u2);
330 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
331 if (strcasecmp (p1, p2) == 0)
335 DEBUGP (("They are quite alike.\n"));
338 else if (opt.simple_check)
342 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
345 real1 = realhost (p1);
346 real2 = realhost (p2);
349 if (strcasecmp (real1, real2) == 0)
351 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
358 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
365 /* Determine whether a URL is acceptable to be followed, according to
366 a list of domains to accept. */
368 accept_domain (struct urlinfo *u)
370 assert (u->host != NULL);
373 if (!sufmatch ((const char **)opt.domains, u->host))
376 if (opt.exclude_domains)
378 if (sufmatch ((const char **)opt.exclude_domains, u->host))
384 /* Check whether WHAT is matched in LIST, each element of LIST being a
385 pattern to match WHAT against, using backward matching (see
386 match_backwards() in utils.c).
388 If an element of LIST matched, 1 is returned, 0 otherwise. */
390 sufmatch (const char **list, const char *what)
395 for (i = 0; list[i]; i++)
397 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
398 if (tolower (list[i][j]) != tolower (what[k]))
400 /* The domain must be first to reach to beginning. */
407 /* Return email address of the form username@FQDN suitable for
408 anonymous FTP passwords. This process is error-prone, and the
409 escape hatch is the MY_HOST preprocessor constant, which can be
410 used to hard-code either your hostname or FQDN at compile-time.
412 If the FQDN cannot be determined, a warning is printed, and the
413 function returns a short `username@' form, accepted by most
416 If not even the username cannot be divined, it means things are
417 seriously fucked up, and Wget exits. */
419 ftp_getaddress (void)
421 static char *address;
423 /* Do the drill only the first time, as it won't change. */
426 char userid[32]; /* 9 should be enough for Unix, but
427 I'd rather be on the safe side. */
430 if (!pwd_cuserid (userid))
432 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
437 STRDUP_ALLOCA (host, MY_HOST);
438 #else /* not MY_HOST */
442 if (uname (&ubuf) < 0)
444 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
445 exec_name, strerror (errno));
449 STRDUP_ALLOCA (host, ubuf.nodename);
451 #else /* not HAVE_UNAME */
452 #ifdef HAVE_GETHOSTNAME
454 if (gethostname (host, 256) < 0)
456 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
461 #else /* not HAVE_GETHOSTNAME */
462 #error Cannot determine host name.
463 #endif /* not HAVE_GETHOSTNAME */
464 #endif /* not HAVE_UNAME */
465 #endif /* not MY_HOST */
466 /* If the address we got so far contains a period, don't bother
468 if (strchr (host, '.'))
472 /* #### I've seen the following scheme fail on at least one
473 system! Do we care? */
475 /* According to Richard Stevens, the correct way to find the
476 FQDN is to (1) find the host name, (2) find its IP
477 address using gethostbyname(), and (3) get the FQDN using
478 gethostbyaddr(). So that's what we'll do. Step one has
481 struct hostent *hp = gethostbyname (host);
482 if (!hp || !hp->h_addr_list)
484 logprintf (LOG_ALWAYS, _("\
485 %s: Warning: cannot determine local IP address.\n"),
490 /* Copy the argument, so the call to gethostbyaddr doesn't
491 clobber it -- just in case. */
492 tmpstore = (char *)alloca (hp->h_length);
493 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
495 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
496 if (!hp || !hp->h_name)
498 logprintf (LOG_ALWAYS, _("\
499 %s: Warning: cannot reverse-lookup local IP address.\n"),
504 if (!strchr (hp->h_name, '.'))
507 /* This gets ticked pretty often. Karl Berry reports
508 that there can be valid reasons for the local host
509 name not to be an FQDN, so I've decided to remove the
511 logprintf (LOG_ALWAYS, _("\
512 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
518 /* Once we're here, hp->h_name contains the correct FQDN. */
519 STRDUP_ALLOCA (fqdn, hp->h_name);
522 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
523 sprintf (address, "%s@%s", userid, fqdn);
528 /* Print error messages for host errors. */
532 /* Can't use switch since some constants are equal (at least on my
533 system), and the compiler signals "duplicate case value". */
534 if (error == HOST_NOT_FOUND
535 || error == NO_RECOVERY
537 || error == NO_ADDRESS
538 || error == TRY_AGAIN)
539 return _("Host not found");
541 return _("Unknown error");
544 /* Clean the host list. This is a separate function, so we needn't
545 export HLIST and its implementation. Ha! */
549 struct host *l = hlist;
553 struct host *p = l->next;