1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
38 # include <arpa/inet.h>
42 #ifdef HAVE_SYS_UTSNAME_H
43 # include <sys/utsname.h>
59 /* Host's symbolical name, as encountered at the time of first
60 inclusion, e.g. "fly.cc.fer.hr". */
62 /* Host's "real" name, i.e. its IP address, written out in ASCII
63 form of N.N.N.N, e.g. "161.53.70.130". */
65 /* More than one HOSTNAME can correspond to the same REALNAME. For
66 our purposes, the canonical name of the host is its HOSTNAME when
67 it was first encountered. This entry is said to have QUALITY. */
69 /* Next entry in the list. */
73 static struct host *hlist;
75 static struct host *add_hlist PARAMS ((struct host *, const char *,
78 /* The same as gethostbyname, but supports internet addresses of the
81 ngethostbyname (const char *name)
86 addr = (unsigned long)inet_addr (name);
88 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
90 hp = gethostbyname (name);
94 /* Search for HOST in the linked list L, by hostname. Return the
95 entry, if found, or NULL. The search is case-insensitive. */
97 search_host (struct host *l, const char *host)
99 for (; l; l = l->next)
100 if (strcasecmp (l->hostname, host) == 0)
105 /* Like search_host, but searches by address. */
107 search_address (struct host *l, const char *address)
109 for (; l; l = l->next)
111 int cmp = strcmp (l->realname, address);
120 /* Store the address of HOSTNAME, internet-style, to WHERE. First
121 check for it in the host list, and (if not found), use
122 ngethostbyname to get it.
124 Return 1 on successful finding of the hostname, 0 otherwise. */
126 store_hostaddress (unsigned char *where, const char *hostname)
130 struct hostent *hptr;
134 /* If the address is of the form d.d.d.d, there will be no trouble
136 addr = (unsigned long)inet_addr (hostname);
139 /* If it is not of that form, try to find it in the cache. */
140 t = search_host (hlist, hostname);
142 addr = (unsigned long)inet_addr (t->realname);
144 /* If we have the numeric address, just store it. */
147 /* This works on both little and big endian architecture, as
148 inet_addr returns the address in the proper order. */
149 #ifdef WORDS_BIGENDIAN
150 if (sizeof (addr) == 8)
152 /* We put the shift amount in a variable because it quiets gcc -Wall's
153 warning on 32-bit-address systems: "warning: left shift count >=
154 width of type". The optimizer should constant-fold away this
155 variable (you'd think the warning would come back with maximum
156 optimization turned on, but it doesn't, on gcc 2.8.1, at least).
157 Not sure if there's a cleaner way to get rid of the warning -- can
158 this code be surrounded by an #ifdef that's never active on 32-bit
159 systems? Is there no way to check at configure-time whether we'll
160 ever potentially encounter a 64-bit address? */
161 int shift_amount = 32;
163 addr <<= shift_amount;
166 memcpy (where, &addr, 4);
169 /* Since all else has failed, let's try gethostbyname(). Note that
170 we use gethostbyname() rather than ngethostbyname(), because we
171 *know* the address is not numerical. */
172 hptr = gethostbyname (hostname);
175 /* Copy the address of the host to socket description. */
176 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
177 /* Now that we're here, we could as well cache the hostname for
178 future use, as in realhost(). First, we have to look for it by
179 address to know if it's already in the cache by another name. */
181 /* Originally, we copied to in.s_addr, but it appears to be missing
183 memcpy (&in, *hptr->h_addr_list, sizeof (in));
184 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
185 t = search_address (hlist, inet_s);
186 if (t) /* Found in the list, as realname. */
188 /* Set the default, 0 quality. */
189 hlist = add_hlist (hlist, hostname, inet_s, 0);
192 /* Since this is really the first time this host is encountered,
194 hlist = add_hlist (hlist, hostname, inet_s, 1);
198 /* Add a host to the host list. The list is sorted by addresses. For
199 equal addresses, the entries with quality should bubble towards the
200 beginning of the list. */
202 add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
204 struct host *t, *old, *beg;
206 /* The entry goes to the beginning of the list if the list is empty
207 or the order requires it. */
208 if (!l || (strcmp (nreal, l->realname) < 0))
210 t = (struct host *)xmalloc (sizeof (struct host));
211 t->hostname = xstrdup (nhost);
212 t->realname = xstrdup (nreal);
213 t->quality = quality;
219 /* Second two one-before-the-last element. */
225 cmp = strcmp (nreal, l->realname);
228 /* If the next list element is greater than s, put s between the
229 current and the next list element. */
230 t = (struct host *)xmalloc (sizeof (struct host));
233 t->hostname = xstrdup (nhost);
234 t->realname = xstrdup (nreal);
235 t->quality = quality;
238 t = (struct host *)xmalloc (sizeof (struct host));
239 t->hostname = xstrdup (nhost);
240 t->realname = xstrdup (nreal);
241 t->quality = quality;
242 /* Insert the new element after the last element. */
248 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
249 is referenced by more than one name, "real" name is considered to
250 be the first one encountered in the past.
252 If the host cannot be found in the list of already dealt-with
253 hosts, try with its INET address. If this fails too, add it to the
254 list. The routine does not call gethostbyname twice for the same
255 host if it can possibly avoid it. */
257 realhost (const char *host)
261 struct hostent *hptr;
264 DEBUGP (("Checking for %s.\n", host));
265 /* Look for the host, looking by the host name. */
266 l = search_host (hlist, host);
267 if (l && l->quality) /* Found it with quality */
269 DEBUGP (("%s was already used, by that name.\n", host));
270 /* Here we return l->hostname, not host, because of the possible
271 case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
272 the same, but we want the one that was first. */
273 return xstrdup (l->hostname);
275 else if (!l) /* Not found, with or without quality */
277 /* The fact that gethostbyname will get called makes it
278 necessary to store it to the list, to ensure that
279 gethostbyname will not be called twice for the same string.
280 However, the quality argument must be set appropriately.
282 Note that add_hlist must be called *after* the realname
283 search, or the quality would be always set to 0 */
284 DEBUGP (("This is the first time I hear about host %s by that name.\n",
286 hptr = ngethostbyname (host);
288 return xstrdup (host);
289 /* Originally, we copied to in.s_addr, but it appears to be
290 missing on some systems. */
291 memcpy (&in, *hptr->h_addr_list, sizeof (in));
292 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
294 else /* Found, without quality */
296 /* This case happens when host is on the list,
297 but not as first entry (the one with quality).
298 Then we just get its INET address and pick
299 up the first entry with quality. */
300 DEBUGP (("We've dealt with host %s, but under the name %s.\n",
302 STRDUP_ALLOCA (inet_s, l->realname);
305 /* Now we certainly have the INET address. The following loop is
306 guaranteed to pick either an entry with quality (because it is
307 the first one), or none at all. */
308 l = search_address (hlist, inet_s);
309 if (l) /* Found in the list, as realname. */
311 /* Set the default, 0 quality. */
312 hlist = add_hlist (hlist, host, inet_s, 0);
313 return xstrdup (l->hostname);
315 /* Since this is really the first time this host is encountered,
317 hlist = add_hlist (hlist, host, inet_s, 1);
318 return xstrdup (host);
321 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
322 taking care of aliases. It uses realhost() to determine a unique
323 hostname for each of two hosts. If simple_check is non-zero, only
324 strcmp() is used for comparison. */
326 same_host (const char *u1, const char *u2)
332 /* Skip protocol, if present. */
335 u1 += skip_proto (u1);
336 u2 += skip_proto (u2);
338 /* Skip username ans password, if present. */
339 u1 += skip_uname (u1);
340 u2 += skip_uname (u2);
342 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
343 p1 = strdupdelim (s, u1);
344 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
345 p2 = strdupdelim (s, u2);
346 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
347 if (strcasecmp (p1, p2) == 0)
351 DEBUGP (("They are quite alike.\n"));
354 else if (opt.simple_check)
358 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
361 real1 = realhost (p1);
362 real2 = realhost (p2);
365 if (strcasecmp (real1, real2) == 0)
367 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
374 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
381 /* Determine whether a URL is acceptable to be followed, according to
382 a list of domains to accept. */
384 accept_domain (struct urlinfo *u)
386 assert (u->host != NULL);
389 if (!sufmatch ((const char **)opt.domains, u->host))
392 if (opt.exclude_domains)
394 if (sufmatch ((const char **)opt.exclude_domains, u->host))
400 /* Check whether WHAT is matched in LIST, each element of LIST being a
401 pattern to match WHAT against, using backward matching (see
402 match_backwards() in utils.c).
404 If an element of LIST matched, 1 is returned, 0 otherwise. */
406 sufmatch (const char **list, const char *what)
411 for (i = 0; list[i]; i++)
413 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
414 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
416 /* The domain must be first to reach to beginning. */
423 /* Return email address of the form username@FQDN suitable for
424 anonymous FTP passwords. This process is error-prone, and the
425 escape hatch is the MY_HOST preprocessor constant, which can be
426 used to hard-code either your hostname or FQDN at compile-time.
428 If the FQDN cannot be determined, a warning is printed, and the
429 function returns a short `username@' form, accepted by most
432 If not even the username cannot be divined, it means things are
433 seriously fucked up, and Wget exits. */
435 ftp_getaddress (void)
437 static char *address;
439 /* Do the drill only the first time, as it won't change. */
442 char userid[32]; /* 9 should be enough for Unix, but
443 I'd rather be on the safe side. */
446 if (!pwd_cuserid (userid))
448 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
453 STRDUP_ALLOCA (host, MY_HOST);
454 #else /* not MY_HOST */
458 if (uname (&ubuf) < 0)
460 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
461 exec_name, strerror (errno));
465 STRDUP_ALLOCA (host, ubuf.nodename);
467 #else /* not HAVE_UNAME */
468 #ifdef HAVE_GETHOSTNAME
470 if (gethostname (host, 256) < 0)
472 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
477 #else /* not HAVE_GETHOSTNAME */
478 #error Cannot determine host name.
479 #endif /* not HAVE_GETHOSTNAME */
480 #endif /* not HAVE_UNAME */
481 #endif /* not MY_HOST */
482 /* If the address we got so far contains a period, don't bother
484 if (strchr (host, '.'))
488 /* #### I've seen the following scheme fail on at least one
489 system! Do we care? */
491 /* According to Richard Stevens, the correct way to find the
492 FQDN is to (1) find the host name, (2) find its IP
493 address using gethostbyname(), and (3) get the FQDN using
494 gethostbyaddr(). So that's what we'll do. Step one has
497 struct hostent *hp = gethostbyname (host);
498 if (!hp || !hp->h_addr_list)
500 logprintf (LOG_ALWAYS, _("\
501 %s: Warning: cannot determine local IP address.\n"),
506 /* Copy the argument, so the call to gethostbyaddr doesn't
507 clobber it -- just in case. */
508 tmpstore = (char *)alloca (hp->h_length);
509 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
511 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
512 if (!hp || !hp->h_name)
514 logprintf (LOG_ALWAYS, _("\
515 %s: Warning: cannot reverse-lookup local IP address.\n"),
520 if (!strchr (hp->h_name, '.'))
523 /* This gets ticked pretty often. Karl Berry reports
524 that there can be valid reasons for the local host
525 name not to be an FQDN, so I've decided to remove the
527 logprintf (LOG_ALWAYS, _("\
528 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
534 /* Once we're here, hp->h_name contains the correct FQDN. */
535 STRDUP_ALLOCA (fqdn, hp->h_name);
538 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
539 sprintf (address, "%s@%s", userid, fqdn);
544 /* Print error messages for host errors. */
548 /* Can't use switch since some constants are equal (at least on my
549 system), and the compiler signals "duplicate case value". */
550 if (error == HOST_NOT_FOUND
551 || error == NO_RECOVERY
553 || error == NO_ADDRESS
554 || error == TRY_AGAIN)
555 return _("Host not found");
557 return _("Unknown error");
560 /* Clean the host list. This is a separate function, so we needn't
561 export HLIST and its implementation. Ha! */
565 struct host *l = hlist;
569 struct host *p = l->next;