1 /* Dealing with host names.
2 Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #include <sys/types.h>
36 # include <sys/socket.h>
37 # include <netinet/in.h>
38 # include <arpa/inet.h>
42 #ifdef HAVE_SYS_UTSNAME_H
43 # include <sys/utsname.h>
59 /* Host's symbolical name, as encountered at the time of first
60 inclusion, e.g. "fly.cc.fer.hr". */
62 /* Host's "real" name, i.e. its IP address, written out in ASCII
63 form of N.N.N.N, e.g. "161.53.70.130". */
65 /* More than one HOSTNAME can correspond to the same REALNAME. For
66 our purposes, the canonical name of the host is its HOSTNAME when
67 it was first encountered. This entry is said to have QUALITY. */
69 /* Next entry in the list. */
73 static struct host *hlist;
75 static struct host *add_hlist PARAMS ((struct host *, const char *,
78 /* The same as gethostbyname, but supports internet addresses of the
81 ngethostbyname (const char *name)
86 addr = (unsigned long)inet_addr (name);
88 hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
90 hp = gethostbyname (name);
94 /* Search for HOST in the linked list L, by hostname. Return the
95 entry, if found, or NULL. The search is case-insensitive. */
97 search_host (struct host *l, const char *host)
99 for (; l; l = l->next)
100 if (strcasecmp (l->hostname, host) == 0)
105 /* Like search_host, but searches by address. */
107 search_address (struct host *l, const char *address)
109 for (; l; l = l->next)
111 int cmp = strcmp (l->realname, address);
120 /* Store the address of HOSTNAME, internet-style, to WHERE. First
121 check for it in the host list, and (if not found), use
122 ngethostbyname to get it.
124 Return 1 on successful finding of the hostname, 0 otherwise. */
126 store_hostaddress (unsigned char *where, const char *hostname)
130 struct hostent *hptr;
134 /* If the address is of the form d.d.d.d, there will be no trouble
136 addr = (unsigned long)inet_addr (hostname);
139 /* If it is not of that form, try to find it in the cache. */
140 t = search_host (hlist, hostname);
142 addr = (unsigned long)inet_addr (t->realname);
144 /* If we have the numeric address, just store it. */
147 /* ADDR is in network byte order, meaning the code works on
148 little and big endian 32-bit architectures without change.
149 On big endian 64-bit architectures we need to be careful to
150 copy the correct four bytes. */
152 #ifdef WORDS_BIGENDIAN
153 offset = sizeof (unsigned long) - 4;
155 memcpy (where, (char *)&addr + offset, 4);
158 /* Since all else has failed, let's try gethostbyname(). Note that
159 we use gethostbyname() rather than ngethostbyname(), because we
160 *know* the address is not numerical. */
161 hptr = gethostbyname (hostname);
164 /* Copy the address of the host to socket description. */
165 memcpy (where, hptr->h_addr_list[0], hptr->h_length);
166 /* Now that we're here, we could as well cache the hostname for
167 future use, as in realhost(). First, we have to look for it by
168 address to know if it's already in the cache by another name. */
170 /* Originally, we copied to in.s_addr, but it appears to be missing
172 memcpy (&in, *hptr->h_addr_list, sizeof (in));
173 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
174 t = search_address (hlist, inet_s);
175 if (t) /* Found in the list, as realname. */
177 /* Set the default, 0 quality. */
178 hlist = add_hlist (hlist, hostname, inet_s, 0);
181 /* Since this is really the first time this host is encountered,
183 hlist = add_hlist (hlist, hostname, inet_s, 1);
187 /* Add a host to the host list. The list is sorted by addresses. For
188 equal addresses, the entries with quality should bubble towards the
189 beginning of the list. */
191 add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
193 struct host *t, *old, *beg;
195 /* The entry goes to the beginning of the list if the list is empty
196 or the order requires it. */
197 if (!l || (strcmp (nreal, l->realname) < 0))
199 t = (struct host *)xmalloc (sizeof (struct host));
200 t->hostname = xstrdup (nhost);
201 t->realname = xstrdup (nreal);
202 t->quality = quality;
208 /* Second two one-before-the-last element. */
214 cmp = strcmp (nreal, l->realname);
217 /* If the next list element is greater than s, put s between the
218 current and the next list element. */
219 t = (struct host *)xmalloc (sizeof (struct host));
222 t->hostname = xstrdup (nhost);
223 t->realname = xstrdup (nreal);
224 t->quality = quality;
227 t = (struct host *)xmalloc (sizeof (struct host));
228 t->hostname = xstrdup (nhost);
229 t->realname = xstrdup (nreal);
230 t->quality = quality;
231 /* Insert the new element after the last element. */
237 /* Determine the "real" name of HOST, as perceived by Wget. If HOST
238 is referenced by more than one name, "real" name is considered to
239 be the first one encountered in the past.
241 If the host cannot be found in the list of already dealt-with
242 hosts, try with its INET address. If this fails too, add it to the
243 list. The routine does not call gethostbyname twice for the same
244 host if it can possibly avoid it. */
246 realhost (const char *host)
248 struct host *l, *l_real;
250 struct hostent *hptr;
253 DEBUGP (("Checking for %s.\n", host));
254 /* Look for the host, looking by the host name. */
255 l = search_host (hlist, host);
256 if (l && l->quality) /* Found it with quality */
258 DEBUGP (("%s was already used, by that name.\n", host));
259 /* Here we return l->hostname, not host, because of the possible
260 case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
261 the same, but we want the one that was first. */
262 return xstrdup (l->hostname);
264 else if (!l) /* Not found, with or without quality */
266 /* The fact that gethostbyname will get called makes it
267 necessary to store it to the list, to ensure that
268 gethostbyname will not be called twice for the same string.
269 However, the quality argument must be set appropriately.
271 Note that add_hlist must be called *after* the realname
272 search, or the quality would be always set to 0 */
273 DEBUGP (("This is the first time I hear about host %s by that name.\n",
275 hptr = ngethostbyname (host);
277 return xstrdup (host);
278 /* Originally, we copied to in.s_addr, but it appears to be
279 missing on some systems. */
280 memcpy (&in, *hptr->h_addr_list, sizeof (in));
281 STRDUP_ALLOCA (inet_s, inet_ntoa (in));
283 else /* Found, without quality */
285 /* This case happens when host is on the list,
286 but not as first entry (the one with quality).
287 Then we just get its INET address and pick
288 up the first entry with quality. */
289 DEBUGP (("We've dealt with host %s, but under the name %s.\n",
291 STRDUP_ALLOCA (inet_s, l->realname);
294 /* Now we certainly have the INET address. The following loop is
295 guaranteed to pick either an entry with quality (because it is
296 the first one), or none at all. */
297 l_real = search_address (hlist, inet_s);
298 if (l_real) /* Found in the list, as realname. */
301 /* Set the default, 0 quality. */
302 hlist = add_hlist (hlist, host, inet_s, 0);
303 return xstrdup (l_real->hostname);
305 /* Since this is really the first time this host is encountered,
307 hlist = add_hlist (hlist, host, inet_s, 1);
308 return xstrdup (host);
311 /* Compare two hostnames (out of URL-s if the arguments are URL-s),
312 taking care of aliases. It uses realhost() to determine a unique
313 hostname for each of two hosts. If simple_check is non-zero, only
314 strcmp() is used for comparison. */
316 same_host (const char *u1, const char *u2)
322 /* Skip protocol, if present. */
325 u1 += skip_proto (u1);
326 u2 += skip_proto (u2);
328 /* Skip username ans password, if present. */
329 u1 += skip_uname (u1);
330 u2 += skip_uname (u2);
332 for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
333 p1 = strdupdelim (s, u1);
334 for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
335 p2 = strdupdelim (s, u2);
336 DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
337 if (strcasecmp (p1, p2) == 0)
341 DEBUGP (("They are quite alike.\n"));
344 else if (opt.simple_check)
348 DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
351 real1 = realhost (p1);
352 real2 = realhost (p2);
355 if (strcasecmp (real1, real2) == 0)
357 DEBUGP (("They are alike, after realhost()->%s.\n", real1));
364 DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
371 /* Determine whether a URL is acceptable to be followed, according to
372 a list of domains to accept. */
374 accept_domain (struct urlinfo *u)
376 assert (u->host != NULL);
379 if (!sufmatch ((const char **)opt.domains, u->host))
382 if (opt.exclude_domains)
384 if (sufmatch ((const char **)opt.exclude_domains, u->host))
390 /* Check whether WHAT is matched in LIST, each element of LIST being a
391 pattern to match WHAT against, using backward matching (see
392 match_backwards() in utils.c).
394 If an element of LIST matched, 1 is returned, 0 otherwise. */
396 sufmatch (const char **list, const char *what)
401 for (i = 0; list[i]; i++)
403 for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
404 if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
406 /* The domain must be first to reach to beginning. */
413 /* Return email address of the form username@FQDN suitable for
414 anonymous FTP passwords. This process is error-prone, and the
415 escape hatch is the MY_HOST preprocessor constant, which can be
416 used to hard-code either your hostname or FQDN at compile-time.
418 If the FQDN cannot be determined, a warning is printed, and the
419 function returns a short `username@' form, accepted by most
422 If not even the username cannot be divined, it means things are
423 seriously fucked up, and Wget exits. */
425 ftp_getaddress (void)
427 static char *address;
429 /* Do the drill only the first time, as it won't change. */
432 char userid[32]; /* 9 should be enough for Unix, but
433 I'd rather be on the safe side. */
436 if (!pwd_cuserid (userid))
438 logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
443 STRDUP_ALLOCA (host, MY_HOST);
444 #else /* not MY_HOST */
448 if (uname (&ubuf) < 0)
450 logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
451 exec_name, strerror (errno));
455 STRDUP_ALLOCA (host, ubuf.nodename);
457 #else /* not HAVE_UNAME */
458 #ifdef HAVE_GETHOSTNAME
460 if (gethostname (host, 256) < 0)
462 logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
467 #else /* not HAVE_GETHOSTNAME */
468 #error Cannot determine host name.
469 #endif /* not HAVE_GETHOSTNAME */
470 #endif /* not HAVE_UNAME */
471 #endif /* not MY_HOST */
472 /* If the address we got so far contains a period, don't bother
474 if (strchr (host, '.'))
478 /* #### I've seen the following scheme fail on at least one
479 system! Do we care? */
481 /* According to Richard Stevens, the correct way to find the
482 FQDN is to (1) find the host name, (2) find its IP
483 address using gethostbyname(), and (3) get the FQDN using
484 gethostbyaddr(). So that's what we'll do. Step one has
487 struct hostent *hp = gethostbyname (host);
488 if (!hp || !hp->h_addr_list)
490 logprintf (LOG_ALWAYS, _("\
491 %s: Warning: cannot determine local IP address.\n"),
496 /* Copy the argument, so the call to gethostbyaddr doesn't
497 clobber it -- just in case. */
498 tmpstore = (char *)alloca (hp->h_length);
499 memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
501 hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
502 if (!hp || !hp->h_name)
504 logprintf (LOG_ALWAYS, _("\
505 %s: Warning: cannot reverse-lookup local IP address.\n"),
510 if (!strchr (hp->h_name, '.'))
513 /* This gets ticked pretty often. Karl Berry reports
514 that there can be valid reasons for the local host
515 name not to be an FQDN, so I've decided to remove the
517 logprintf (LOG_ALWAYS, _("\
518 %s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
524 /* Once we're here, hp->h_name contains the correct FQDN. */
525 STRDUP_ALLOCA (fqdn, hp->h_name);
528 address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
529 sprintf (address, "%s@%s", userid, fqdn);
534 /* Print error messages for host errors. */
538 /* Can't use switch since some constants are equal (at least on my
539 system), and the compiler signals "duplicate case value". */
540 if (error == HOST_NOT_FOUND
541 || error == NO_RECOVERY
543 || error == NO_ADDRESS
544 || error == TRY_AGAIN)
545 return _("Host not found");
547 return _("Unknown error");
550 /* Clean the host list. This is a separate function, so we needn't
551 export HLIST and its implementation. Ha! */
555 struct host *l = hlist;
559 struct host *p = l->next;