sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 2003 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9 (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #ifdef HAVE_STRING_H
  35 # include <string.h>
  36 #else  /* not HAVE_STRING_H */
  37 # include <strings.h>
  38 #endif /* not HAVE_STRING_H */
  39 #include <sys/types.h>
  40 #ifdef HAVE_UNISTD_H
  41 # include <unistd.h>
  42 #endif
  43 #ifdef HAVE_MMAP
  44 # include <sys/mman.h>
  45 #endif
  46 #ifdef HAVE_PWD_H
  47 # include <pwd.h>
  48 #endif
  49 #ifdef HAVE_LIMITS_H
  50 # include <limits.h>
  51 #endif
  52 #ifdef HAVE_UTIME_H
  53 # include <utime.h>
  54 #endif
  55 #ifdef HAVE_SYS_UTIME_H
  56 # include <sys/utime.h>
  57 #endif
  58 #include <errno.h>
  59 #ifdef NeXT
  60 # include <libc.h>              /* for access() */
  61 #endif
  62 #include <fcntl.h>
  63 #include <assert.h>
  64
  65 /* For TIOCGWINSZ and friends: */
  66 #ifdef HAVE_SYS_IOCTL_H
  67 # include <sys/ioctl.h>
  68 #endif
  69 #ifdef HAVE_TERMIOS_H
  70 # include <termios.h>
  71 #endif
  72
  73 /* Needed for run_with_timeout. */
  74 #undef USE_SIGNAL_TIMEOUT
  75 #ifdef HAVE_SIGNAL_H
  76 # include <signal.h>
  77 #endif
  78 #ifdef HAVE_SETJMP_H
  79 # include <setjmp.h>
  80 #endif
  81
  82 #ifndef HAVE_SIGSETJMP
  83 /* If sigsetjmp is a macro, configure won't pick it up. */
  84 # ifdef sigsetjmp
  85 #  define HAVE_SIGSETJMP
  86 # endif
  87 #endif
  88
  89 #ifdef HAVE_SIGNAL
  90 # ifdef HAVE_SIGSETJMP
  91 #  define USE_SIGNAL_TIMEOUT
  92 # endif
  93 # ifdef HAVE_SIGBLOCK
  94 #  define USE_SIGNAL_TIMEOUT
  95 # endif
  96 #endif
  97
  98 #include "wget.h"
  99 #include "utils.h"
 100 #include "hash.h"
 101
 102 #ifndef errno
 103 extern int errno;
 104 #endif
 105
 106 /* Utility function: like xstrdup(), but also lowercases S.  */
 107
 108 char *
 109 xstrdup_lower (const char *s)
 110 {
 111   char *copy = xstrdup (s);
 112   char *p = copy;
 113   for (; *p; p++)
 114     *p = TOLOWER (*p);
 115   return copy;
 116 }
 117
 118 /* Return a count of how many times CHR occurs in STRING. */
 119
 120 int
 121 count_char (const char *string, char chr)
 122 {
 123   const char *p;
 124   int count = 0;
 125   for (p = string; *p; p++)
 126     if (*p == chr)
 127       ++count;
 128   return count;
 129 }
 130
 131 /* Copy the string formed by two pointers (one on the beginning, other
 132    on the char after the last char) to a new, malloc-ed location.
 133    0-terminate it.  */
 134 char *
 135 strdupdelim (const char *beg, const char *end)
 136 {
 137   char *res = (char *)xmalloc (end - beg + 1);
 138   memcpy (res, beg, end - beg);
 139   res[end - beg] = '\0';
 140   return res;
 141 }
 142
 143 /* Parse a string containing comma-separated elements, and return a
 144    vector of char pointers with the elements.  Spaces following the
 145    commas are ignored.  */
 146 char **
 147 sepstring (const char *s)
 148 {
 149   char **res;
 150   const char *p;
 151   int i = 0;
 152
 153   if (!s || !*s)
 154     return NULL;
 155   res = NULL;
 156   p = s;
 157   while (*s)
 158     {
 159       if (*s == ',')
 160         {
 161           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 162           res[i] = strdupdelim (p, s);
 163           res[++i] = NULL;
 164           ++s;
 165           /* Skip the blanks following the ','.  */
 166           while (ISSPACE (*s))
 167             ++s;
 168           p = s;
 169         }
 170       else
 171         ++s;
 172     }
 173   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 174   res[i] = strdupdelim (p, s);
 175   res[i + 1] = NULL;
 176   return res;
 177 }
 178 \f
 179 /* Return pointer to a static char[] buffer in which zero-terminated
 180    string-representation of TM (in form hh:mm:ss) is printed.
 181
 182    If TM is NULL, the current time will be used.  */
 183
 184 char *
 185 time_str (time_t *tm)
 186 {
 187   static char output[15];
 188   struct tm *ptm;
 189   time_t secs = tm ? *tm : time (NULL);
 190
 191   if (secs == -1)
 192     {
 193       /* In case of error, return the empty string.  Maybe we should
 194          just abort if this happens?  */
 195       *output = '\0';
 196       return output;
 197     }
 198   ptm = localtime (&secs);
 199   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 200   return output;
 201 }
 202
 203 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 204
 205 char *
 206 datetime_str (time_t *tm)
 207 {
 208   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 209   struct tm *ptm;
 210   time_t secs = tm ? *tm : time (NULL);
 211
 212   if (secs == -1)
 213     {
 214       /* In case of error, return the empty string.  Maybe we should
 215          just abort if this happens?  */
 216       *output = '\0';
 217       return output;
 218     }
 219   ptm = localtime (&secs);
 220   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 221            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 222            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 223   return output;
 224 }
 225 \f
 226 /* The Windows versions of the following two functions are defined in
 227    mswindows.c.  */
 228
 229 #ifndef WINDOWS
 230 void
 231 fork_to_background (void)
 232 {
 233   pid_t pid;
 234   /* Whether we arrange our own version of opt.lfilename here.  */
 235   int changedp = 0;
 236
 237   if (!opt.lfilename)
 238     {
 239       opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
 240       changedp = 1;
 241     }
 242   pid = fork ();
 243   if (pid < 0)
 244     {
 245       /* parent, error */
 246       perror ("fork");
 247       exit (1);
 248     }
 249   else if (pid != 0)
 250     {
 251       /* parent, no error */
 252       printf (_("Continuing in background, pid %d.\n"), (int)pid);
 253       if (changedp)
 254         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 255       exit (0);                 /* #### should we use _exit()? */
 256     }
 257
 258   /* child: give up the privileges and keep running. */
 259   setsid ();
 260   freopen ("/dev/null", "r", stdin);
 261   freopen ("/dev/null", "w", stdout);
 262   freopen ("/dev/null", "w", stderr);
 263 }
 264 #endif /* not WINDOWS */
 265 \f
 266 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 267    specified with TM.  */
 268 void
 269 touch (const char *file, time_t tm)
 270 {
 271 #ifdef HAVE_STRUCT_UTIMBUF
 272   struct utimbuf times;
 273   times.actime = times.modtime = tm;
 274 #else
 275   time_t times[2];
 276   times[0] = times[1] = tm;
 277 #endif
 278
 279   if (utime (file, &times) == -1)
 280     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 281 }
 282
 283 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 284    nothing under MS-Windows.  */
 285 int
 286 remove_link (const char *file)
 287 {
 288   int err = 0;
 289   struct stat st;
 290
 291   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 292     {
 293       DEBUGP (("Unlinking %s (symlink).\n", file));
 294       err = unlink (file);
 295       if (err != 0)
 296         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 297                    file, strerror (errno));
 298     }
 299   return err;
 300 }
 301
 302 /* Does FILENAME exist?  This is quite a lousy implementation, since
 303    it supplies no error codes -- only a yes-or-no answer.  Thus it
 304    will return that a file does not exist if, e.g., the directory is
 305    unreadable.  I don't mind it too much currently, though.  The
 306    proper way should, of course, be to have a third, error state,
 307    other than true/false, but that would introduce uncalled-for
 308    additional complexity to the callers.  */
 309 int
 310 file_exists_p (const char *filename)
 311 {
 312 #ifdef HAVE_ACCESS
 313   return access (filename, F_OK) >= 0;
 314 #else
 315   struct stat buf;
 316   return stat (filename, &buf) >= 0;
 317 #endif
 318 }
 319
 320 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 321    Returns 0 on error.  */
 322 int
 323 file_non_directory_p (const char *path)
 324 {
 325   struct stat buf;
 326   /* Use lstat() rather than stat() so that symbolic links pointing to
 327      directories can be identified correctly.  */
 328   if (lstat (path, &buf) != 0)
 329     return 0;
 330   return S_ISDIR (buf.st_mode) ? 0 : 1;
 331 }
 332
 333 /* Return the size of file named by FILENAME, or -1 if it cannot be
 334    opened or seeked into. */
 335 long
 336 file_size (const char *filename)
 337 {
 338   long size;
 339   /* We use fseek rather than stat to determine the file size because
 340      that way we can also verify whether the file is readable.
 341      Inspired by the POST patch by Arnaud Wylie.  */
 342   FILE *fp = fopen (filename, "rb");
 343   if (!fp)
 344     return -1;
 345   fseek (fp, 0, SEEK_END);
 346   size = ftell (fp);
 347   fclose (fp);
 348   return size;
 349 }
 350
 351 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 352    doesn't exist is found.  Return a freshly allocated copy of the
 353    unused file name.  */
 354
 355 static char *
 356 unique_name_1 (const char *prefix)
 357 {
 358   int count = 1;
 359   int plen = strlen (prefix);
 360   char *template = (char *)alloca (plen + 1 + 24);
 361   char *template_tail = template + plen;
 362
 363   memcpy (template, prefix, plen);
 364   *template_tail++ = '.';
 365
 366   do
 367     number_to_string (template_tail, count++);
 368   while (file_exists_p (template));
 369
 370   return xstrdup (template);
 371 }
 372
 373 /* Return a unique file name, based on FILE.
 374
 375    More precisely, if FILE doesn't exist, it is returned unmodified.
 376    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 377    file name that doesn't exist is returned.
 378
 379    The resulting file is not created, only verified that it didn't
 380    exist at the point in time when the function was called.
 381    Therefore, where security matters, don't rely that the file created
 382    by this function exists until you open it with O_EXCL or
 383    something.
 384
 385    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 386    string.  Otherwise, it may return FILE if the file doesn't exist
 387    (and therefore doesn't need changing).  */
 388
 389 char *
 390 unique_name (const char *file, int allow_passthrough)
 391 {
 392   /* If the FILE itself doesn't exist, return it without
 393      modification. */
 394   if (!file_exists_p (file))
 395     return allow_passthrough ? (char *)file : xstrdup (file);
 396
 397   /* Otherwise, find a numeric suffix that results in unused file name
 398      and return it.  */
 399   return unique_name_1 (file);
 400 }
 401 \f
 402 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 403    are missing, create them first.  In case any mkdir() call fails,
 404    return its error status.  Returns 0 on successful completion.
 405
 406    The behaviour of this function should be identical to the behaviour
 407    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 408 int
 409 make_directory (const char *directory)
 410 {
 411   int quit = 0;
 412   int i;
 413   int ret = 0;
 414   char *dir;
 415
 416   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 417      function is unsafe if called with a read-only char *argument.  */
 418   STRDUP_ALLOCA (dir, directory);
 419
 420   /* If the first character of dir is '/', skip it (and thus enable
 421      creation of absolute-pathname directories.  */
 422   for (i = (*dir == '/'); 1; ++i)
 423     {
 424       for (; dir[i] && dir[i] != '/'; i++)
 425         ;
 426       if (!dir[i])
 427         quit = 1;
 428       dir[i] = '\0';
 429       /* Check whether the directory already exists.  Allow creation of
 430          of intermediate directories to fail, as the initial path components
 431          are not necessarily directories!  */
 432       if (!file_exists_p (dir))
 433         ret = mkdir (dir, 0777);
 434       else
 435         ret = 0;
 436       if (quit)
 437         break;
 438       else
 439         dir[i] = '/';
 440     }
 441   return ret;
 442 }
 443
 444 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 445    should be a file name.
 446
 447    file_merge("/foo/bar", "baz")  => "/foo/baz"
 448    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 449    file_merge("foo", "bar")       => "bar"
 450
 451    In other words, it's a simpler and gentler version of uri_merge_1.  */
 452
 453 char *
 454 file_merge (const char *base, const char *file)
 455 {
 456   char *result;
 457   const char *cut = (const char *)strrchr (base, '/');
 458
 459   if (!cut)
 460     return xstrdup (file);
 461
 462   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 463   memcpy (result, base, cut - base);
 464   result[cut - base] = '/';
 465   strcpy (result + (cut - base) + 1, file);
 466
 467   return result;
 468 }
 469 \f
 470 static int in_acclist PARAMS ((const char *const *, const char *, int));
 471
 472 /* Determine whether a file is acceptable to be followed, according to
 473    lists of patterns to accept/reject.  */
 474 int
 475 acceptable (const char *s)
 476 {
 477   int l = strlen (s);
 478
 479   while (l && s[l] != '/')
 480     --l;
 481   if (s[l] == '/')
 482     s += (l + 1);
 483   if (opt.accepts)
 484     {
 485       if (opt.rejects)
 486         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 487                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 488       else
 489         return in_acclist ((const char *const *)opt.accepts, s, 1);
 490     }
 491   else if (opt.rejects)
 492     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 493   return 1;
 494 }
 495
 496 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 497    `/something', frontcmp() will return 1 only if S2 begins with
 498    `/something'.  Otherwise, 0 is returned.  */
 499 int
 500 frontcmp (const char *s1, const char *s2)
 501 {
 502   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 503   return !*s1;
 504 }
 505
 506 /* Iterate through STRLIST, and return the first element that matches
 507    S, through wildcards or front comparison (as appropriate).  */
 508 static char *
 509 proclist (char **strlist, const char *s, enum accd flags)
 510 {
 511   char **x;
 512
 513   for (x = strlist; *x; x++)
 514     if (has_wildcards_p (*x))
 515       {
 516         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 517           break;
 518       }
 519     else
 520       {
 521         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 522         if (frontcmp (p, s))
 523           break;
 524       }
 525   return *x;
 526 }
 527
 528 /* Returns whether DIRECTORY is acceptable for download, wrt the
 529    include/exclude lists.
 530
 531    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 532    and absolute paths may be freely intermixed.  */
 533 int
 534 accdir (const char *directory, enum accd flags)
 535 {
 536   /* Remove starting '/'.  */
 537   if (flags & ALLABS && *directory == '/')
 538     ++directory;
 539   if (opt.includes)
 540     {
 541       if (!proclist (opt.includes, directory, flags))
 542         return 0;
 543     }
 544   if (opt.excludes)
 545     {
 546       if (proclist (opt.excludes, directory, flags))
 547         return 0;
 548     }
 549   return 1;
 550 }
 551
 552 /* Return non-zero if STRING ends with TAIL.  For instance:
 553
 554    match_tail ("abc", "bc", 0)  -> 1
 555    match_tail ("abc", "ab", 0)  -> 0
 556    match_tail ("abc", "abc", 0) -> 1
 557
 558    If FOLD_CASE_P is non-zero, the comparison will be
 559    case-insensitive.  */
 560
 561 int
 562 match_tail (const char *string, const char *tail, int fold_case_p)
 563 {
 564   int i, j;
 565
 566   /* We want this to be fast, so we code two loops, one with
 567      case-folding, one without. */
 568
 569   if (!fold_case_p)
 570     {
 571       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 572         if (string[i] != tail[j])
 573           break;
 574     }
 575   else
 576     {
 577       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 578         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 579           break;
 580     }
 581
 582   /* If the tail was exhausted, the match was succesful.  */
 583   if (j == -1)
 584     return 1;
 585   else
 586     return 0;
 587 }
 588
 589 /* Checks whether string S matches each element of ACCEPTS.  A list
 590    element are matched either with fnmatch() or match_tail(),
 591    according to whether the element contains wildcards or not.
 592
 593    If the BACKWARD is 0, don't do backward comparison -- just compare
 594    them normally.  */
 595 static int
 596 in_acclist (const char *const *accepts, const char *s, int backward)
 597 {
 598   for (; *accepts; accepts++)
 599     {
 600       if (has_wildcards_p (*accepts))
 601         {
 602           /* fnmatch returns 0 if the pattern *does* match the
 603              string.  */
 604           if (fnmatch (*accepts, s, 0) == 0)
 605             return 1;
 606         }
 607       else
 608         {
 609           if (backward)
 610             {
 611               if (match_tail (s, *accepts, 0))
 612                 return 1;
 613             }
 614           else
 615             {
 616               if (!strcmp (s, *accepts))
 617                 return 1;
 618             }
 619         }
 620     }
 621   return 0;
 622 }
 623
 624 /* Return the location of STR's suffix (file extension).  Examples:
 625    suffix ("foo.bar")       -> "bar"
 626    suffix ("foo.bar.baz")   -> "baz"
 627    suffix ("/foo/bar")      -> NULL
 628    suffix ("/foo.bar/baz")  -> NULL  */
 629 char *
 630 suffix (const char *str)
 631 {
 632   int i;
 633
 634   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 635     ;
 636
 637   if (str[i++] == '.')
 638     return (char *)str + i;
 639   else
 640     return NULL;
 641 }
 642
 643 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
 644    `]').  */
 645
 646 int
 647 has_wildcards_p (const char *s)
 648 {
 649   for (; *s; s++)
 650     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 651       return 1;
 652   return 0;
 653 }
 654
 655 /* Return non-zero if FNAME ends with a typical HTML suffix.  The
 656    following (case-insensitive) suffixes are presumed to be HTML files:
 657
 658      html
 659      htm
 660      ?html (`?' matches one character)
 661
 662    #### CAVEAT.  This is not necessarily a good indication that FNAME
 663    refers to a file that contains HTML!  */
 664 int
 665 has_html_suffix_p (const char *fname)
 666 {
 667   char *suf;
 668
 669   if ((suf = suffix (fname)) == NULL)
 670     return 0;
 671   if (!strcasecmp (suf, "html"))
 672     return 1;
 673   if (!strcasecmp (suf, "htm"))
 674     return 1;
 675   if (suf[0] && !strcasecmp (suf + 1, "html"))
 676     return 1;
 677   return 0;
 678 }
 679
 680 /* Read a line from FP and return the pointer to freshly allocated
 681    storage.  The storage space is obtained through malloc() and should
 682    be freed with free() when it is no longer needed.
 683
 684    The length of the line is not limited, except by available memory.
 685    The newline character at the end of line is retained.  The line is
 686    terminated with a zero character.
 687
 688    After end-of-file is encountered without anything being read, NULL
 689    is returned.  NULL is also returned on error.  To distinguish
 690    between these two cases, use the stdio function ferror().  */
 691
 692 char *
 693 read_whole_line (FILE *fp)
 694 {
 695   int length = 0;
 696   int bufsize = 82;
 697   char *line = (char *)xmalloc (bufsize);
 698
 699   while (fgets (line + length, bufsize - length, fp))
 700     {
 701       length += strlen (line + length);
 702       if (length == 0)
 703         /* Possible for example when reading from a binary file where
 704            a line begins with \0.  */
 705         continue;
 706
 707       if (line[length - 1] == '\n')
 708         break;
 709
 710       /* fgets() guarantees to read the whole line, or to use up the
 711          space we've given it.  We can double the buffer
 712          unconditionally.  */
 713       bufsize <<= 1;
 714       line = xrealloc (line, bufsize);
 715     }
 716   if (length == 0 || ferror (fp))
 717     {
 718       xfree (line);
 719       return NULL;
 720     }
 721   if (length + 1 < bufsize)
 722     /* Relieve the memory from our exponential greediness.  We say
 723        `length + 1' because the terminating \0 is not included in
 724        LENGTH.  We don't need to zero-terminate the string ourselves,
 725        though, because fgets() does that.  */
 726     line = xrealloc (line, length + 1);
 727   return line;
 728 }
 729 \f
 730 /* Read FILE into memory.  A pointer to `struct file_memory' are
 731    returned; use struct element `content' to access file contents, and
 732    the element `length' to know the file length.  `content' is *not*
 733    zero-terminated, and you should *not* read or write beyond the [0,
 734    length) range of characters.
 735
 736    After you are done with the file contents, call read_file_free to
 737    release the memory.
 738
 739    Depending on the operating system and the type of file that is
 740    being read, read_file() either mmap's the file into memory, or
 741    reads the file into the core using read().
 742
 743    If file is named "-", fileno(stdin) is used for reading instead.
 744    If you want to read from a real file named "-", use "./-" instead.  */
 745
 746 struct file_memory *
 747 read_file (const char *file)
 748 {
 749   int fd;
 750   struct file_memory *fm;
 751   long size;
 752   int inhibit_close = 0;
 753
 754   /* Some magic in the finest tradition of Perl and its kin: if FILE
 755      is "-", just use stdin.  */
 756   if (HYPHENP (file))
 757     {
 758       fd = fileno (stdin);
 759       inhibit_close = 1;
 760       /* Note that we don't inhibit mmap() in this case.  If stdin is
 761          redirected from a regular file, mmap() will still work.  */
 762     }
 763   else
 764     fd = open (file, O_RDONLY);
 765   if (fd < 0)
 766     return NULL;
 767   fm = xnew (struct file_memory);
 768
 769 #ifdef HAVE_MMAP
 770   {
 771     struct stat buf;
 772     if (fstat (fd, &buf) < 0)
 773       goto mmap_lose;
 774     fm->length = buf.st_size;
 775     /* NOTE: As far as I know, the callers of this function never
 776        modify the file text.  Relying on this would enable us to
 777        specify PROT_READ and MAP_SHARED for a marginal gain in
 778        efficiency, but at some cost to generality.  */
 779     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 780                         MAP_PRIVATE, fd, 0);
 781     if (fm->content == (char *)MAP_FAILED)
 782       goto mmap_lose;
 783     if (!inhibit_close)
 784       close (fd);
 785
 786     fm->mmap_p = 1;
 787     return fm;
 788   }
 789
 790  mmap_lose:
 791   /* The most common reason why mmap() fails is that FD does not point
 792      to a plain file.  However, it's also possible that mmap() doesn't
 793      work for a particular type of file.  Therefore, whenever mmap()
 794      fails, we just fall back to the regular method.  */
 795 #endif /* HAVE_MMAP */
 796
 797   fm->length = 0;
 798   size = 512;                   /* number of bytes fm->contents can
 799                                    hold at any given time. */
 800   fm->content = xmalloc (size);
 801   while (1)
 802     {
 803       long nread;
 804       if (fm->length > size / 2)
 805         {
 806           /* #### I'm not sure whether the whole exponential-growth
 807              thing makes sense with kernel read.  On Linux at least,
 808              read() refuses to read more than 4K from a file at a
 809              single chunk anyway.  But other Unixes might optimize it
 810              better, and it doesn't *hurt* anything, so I'm leaving
 811              it.  */
 812
 813           /* Normally, we grow SIZE exponentially to make the number
 814              of calls to read() and realloc() logarithmic in relation
 815              to file size.  However, read() can read an amount of data
 816              smaller than requested, and it would be unreasonable to
 817              double SIZE every time *something* was read.  Therefore,
 818              we double SIZE only when the length exceeds half of the
 819              entire allocated size.  */
 820           size <<= 1;
 821           fm->content = xrealloc (fm->content, size);
 822         }
 823       nread = read (fd, fm->content + fm->length, size - fm->length);
 824       if (nread > 0)
 825         /* Successful read. */
 826         fm->length += nread;
 827       else if (nread < 0)
 828         /* Error. */
 829         goto lose;
 830       else
 831         /* EOF */
 832         break;
 833     }
 834   if (!inhibit_close)
 835     close (fd);
 836   if (size > fm->length && fm->length != 0)
 837     /* Due to exponential growth of fm->content, the allocated region
 838        might be much larger than what is actually needed.  */
 839     fm->content = xrealloc (fm->content, fm->length);
 840   fm->mmap_p = 0;
 841   return fm;
 842
 843  lose:
 844   if (!inhibit_close)
 845     close (fd);
 846   xfree (fm->content);
 847   xfree (fm);
 848   return NULL;
 849 }
 850
 851 /* Release the resources held by FM.  Specifically, this calls
 852    munmap() or xfree() on fm->content, depending whether mmap or
 853    malloc/read were used to read in the file.  It also frees the
 854    memory needed to hold the FM structure itself.  */
 855
 856 void
 857 read_file_free (struct file_memory *fm)
 858 {
 859 #ifdef HAVE_MMAP
 860   if (fm->mmap_p)
 861     {
 862       munmap (fm->content, fm->length);
 863     }
 864   else
 865 #endif
 866     {
 867       xfree (fm->content);
 868     }
 869   xfree (fm);
 870 }
 871 \f
 872 /* Free the pointers in a NULL-terminated vector of pointers, then
 873    free the pointer itself.  */
 874 void
 875 free_vec (char **vec)
 876 {
 877   if (vec)
 878     {
 879       char **p = vec;
 880       while (*p)
 881         xfree (*p++);
 882       xfree (vec);
 883     }
 884 }
 885
 886 /* Append vector V2 to vector V1.  The function frees V2 and
 887    reallocates V1 (thus you may not use the contents of neither
 888    pointer after the call).  If V1 is NULL, V2 is returned.  */
 889 char **
 890 merge_vecs (char **v1, char **v2)
 891 {
 892   int i, j;
 893
 894   if (!v1)
 895     return v2;
 896   if (!v2)
 897     return v1;
 898   if (!*v2)
 899     {
 900       /* To avoid j == 0 */
 901       xfree (v2);
 902       return v1;
 903     }
 904   /* Count v1.  */
 905   for (i = 0; v1[i]; i++);
 906   /* Count v2.  */
 907   for (j = 0; v2[j]; j++);
 908   /* Reallocate v1.  */
 909   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
 910   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
 911   xfree (v2);
 912   return v1;
 913 }
 914
 915 /* A set of simple-minded routines to store strings in a linked list.
 916    This used to also be used for searching, but now we have hash
 917    tables for that.  */
 918
 919 /* It's a shame that these simple things like linked lists and hash
 920    tables (see hash.c) need to be implemented over and over again.  It
 921    would be nice to be able to use the routines from glib -- see
 922    www.gtk.org for details.  However, that would make Wget depend on
 923    glib, and I want to avoid dependencies to external libraries for
 924    reasons of convenience and portability (I suspect Wget is more
 925    portable than anything ever written for Gnome).  */
 926
 927 /* Append an element to the list.  If the list has a huge number of
 928    elements, this can get slow because it has to find the list's
 929    ending.  If you think you have to call slist_append in a loop,
 930    think about calling slist_prepend() followed by slist_nreverse().  */
 931
 932 slist *
 933 slist_append (slist *l, const char *s)
 934 {
 935   slist *newel = xnew (slist);
 936   slist *beg = l;
 937
 938   newel->string = xstrdup (s);
 939   newel->next = NULL;
 940
 941   if (!l)
 942     return newel;
 943   /* Find the last element.  */
 944   while (l->next)
 945     l = l->next;
 946   l->next = newel;
 947   return beg;
 948 }
 949
 950 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
 951
 952 slist *
 953 slist_prepend (slist *l, const char *s)
 954 {
 955   slist *newel = xnew (slist);
 956   newel->string = xstrdup (s);
 957   newel->next = l;
 958   return newel;
 959 }
 960
 961 /* Destructively reverse L. */
 962
 963 slist *
 964 slist_nreverse (slist *l)
 965 {
 966   slist *prev = NULL;
 967   while (l)
 968     {
 969       slist *next = l->next;
 970       l->next = prev;
 971       prev = l;
 972       l = next;
 973     }
 974   return prev;
 975 }
 976
 977 /* Is there a specific entry in the list?  */
 978 int
 979 slist_contains (slist *l, const char *s)
 980 {
 981   for (; l; l = l->next)
 982     if (!strcmp (l->string, s))
 983       return 1;
 984   return 0;
 985 }
 986
 987 /* Free the whole slist.  */
 988 void
 989 slist_free (slist *l)
 990 {
 991   while (l)
 992     {
 993       slist *n = l->next;
 994       xfree (l->string);
 995       xfree (l);
 996       l = n;
 997     }
 998 }
 999 \f
1000 /* Sometimes it's useful to create "sets" of strings, i.e. special
1001    hash tables where you want to store strings as keys and merely
1002    query for their existence.  Here is a set of utility routines that
1003    makes that transparent.  */
1004
1005 void
1006 string_set_add (struct hash_table *ht, const char *s)
1007 {
1008   /* First check whether the set element already exists.  If it does,
1009      do nothing so that we don't have to free() the old element and
1010      then strdup() a new one.  */
1011   if (hash_table_contains (ht, s))
1012     return;
1013
1014   /* We use "1" as value.  It provides us a useful and clear arbitrary
1015      value, and it consumes no memory -- the pointers to the same
1016      string "1" will be shared by all the key-value pairs in all `set'
1017      hash tables.  */
1018   hash_table_put (ht, xstrdup (s), "1");
1019 }
1020
1021 /* Synonym for hash_table_contains... */
1022
1023 int
1024 string_set_contains (struct hash_table *ht, const char *s)
1025 {
1026   return hash_table_contains (ht, s);
1027 }
1028
1029 static int
1030 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1031 {
1032   xfree (key);
1033   return 0;
1034 }
1035
1036 void
1037 string_set_free (struct hash_table *ht)
1038 {
1039   hash_table_map (ht, string_set_free_mapper, NULL);
1040   hash_table_destroy (ht);
1041 }
1042
1043 static int
1044 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1045 {
1046   xfree (key);
1047   xfree (value);
1048   return 0;
1049 }
1050
1051 /* Another utility function: call free() on all keys and values of HT.  */
1052
1053 void
1054 free_keys_and_values (struct hash_table *ht)
1055 {
1056   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1057 }
1058
1059 \f
1060 /* Engine for legible and legible_large_int; add thousand separators
1061    to numbers printed in strings.  */
1062
1063 static char *
1064 legible_1 (const char *repr)
1065 {
1066   static char outbuf[48];
1067   int i, i1, mod;
1068   char *outptr;
1069   const char *inptr;
1070
1071   /* Reset the pointers.  */
1072   outptr = outbuf;
1073   inptr = repr;
1074
1075   /* Ignore the sign for the purpose of adding thousand
1076      separators.  */
1077   if (*inptr == '-')
1078     {
1079       *outptr++ = '-';
1080       ++inptr;
1081     }
1082   /* How many digits before the first separator?  */
1083   mod = strlen (inptr) % 3;
1084   /* Insert them.  */
1085   for (i = 0; i < mod; i++)
1086     *outptr++ = inptr[i];
1087   /* Now insert the rest of them, putting separator before every
1088      third digit.  */
1089   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1090     {
1091       if (i % 3 == 0 && i1 != 0)
1092         *outptr++ = ',';
1093       *outptr++ = inptr[i1];
1094     }
1095   /* Zero-terminate the string.  */
1096   *outptr = '\0';
1097   return outbuf;
1098 }
1099
1100 /* Legible -- return a static pointer to the legibly printed long.  */
1101
1102 char *
1103 legible (long l)
1104 {
1105   char inbuf[24];
1106   /* Print the number into the buffer.  */
1107   number_to_string (inbuf, l);
1108   return legible_1 (inbuf);
1109 }
1110
1111 /* Write a string representation of LARGE_INT NUMBER into the provided
1112    buffer.  The buffer should be able to accept 24 characters,
1113    including the terminating zero.
1114
1115    It would be dangerous to use sprintf, because the code wouldn't
1116    work on a machine with gcc-provided long long support, but without
1117    libc support for "%lld".  However, such platforms will typically
1118    not have snprintf and will use our version, which does support
1119    "%lld" where long longs are available.  */
1120
1121 static void
1122 large_int_to_string (char *buffer, LARGE_INT number)
1123 {
1124   snprintf (buffer, 24, LARGE_INT_FMT, number);
1125 }
1126
1127 /* The same as legible(), but works on LARGE_INT.  */
1128
1129 char *
1130 legible_large_int (LARGE_INT l)
1131 {
1132   char inbuf[48];
1133   large_int_to_string (inbuf, l);
1134   return legible_1 (inbuf);
1135 }
1136
1137 /* Count the digits in a (long) integer.  */
1138 int
1139 numdigit (long number)
1140 {
1141   int cnt = 1;
1142   if (number < 0)
1143     {
1144       number = -number;
1145       ++cnt;
1146     }
1147   while ((number /= 10) > 0)
1148     ++cnt;
1149   return cnt;
1150 }
1151
1152 /* Attempt to calculate INT_MAX on machines that don't bother to
1153    define it. */
1154 #ifndef INT_MAX
1155 # ifndef CHAR_BIT
1156 #  define CHAR_BIT 8
1157 # endif
1158 # define INT_MAX ((int) ~((unsigned)1 << CHAR_BIT * sizeof (int) - 1))
1159 #endif
1160
1161 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1162 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1163
1164 #define DIGITS_1(figure) ONE_DIGIT (figure)
1165 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1166 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1167 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1168 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1169 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1170 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1171 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1172 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1173 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1174
1175 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1176
1177 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1178 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1179 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1180 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1181 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1182 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1183 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1184 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1185 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1186
1187 /* Print NUMBER to BUFFER in base 10.  This should be completely
1188    equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1189
1190    The speedup may make a difference in programs that frequently
1191    convert numbers to strings.  Some implementations of sprintf,
1192    particularly the one in GNU libc, have been known to be extremely
1193    slow compared to this function.
1194
1195    Return the pointer to the location where the terminating zero was
1196    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1197    function is done.)
1198
1199    BUFFER should be big enough to accept as many bytes as you expect
1200    the number to take up.  On machines with 64-bit longs the maximum
1201    needed size is 24 bytes.  That includes the digits needed for the
1202    largest 64-bit number, the `-' sign in case it's negative, and the
1203    terminating '\0'.  */
1204
1205 char *
1206 number_to_string (char *buffer, long number)
1207 {
1208   char *p = buffer;
1209   long n = number;
1210
1211 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1212   /* We are running in a strange or misconfigured environment.  Let
1213      sprintf cope with it.  */
1214   sprintf (buffer, "%ld", n);
1215   p += strlen (buffer);
1216 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1217
1218   if (n < 0)
1219     {
1220       if (n < -INT_MAX)
1221         {
1222           /* We cannot print a '-' and assign -n to n because -n would
1223              overflow.  Let sprintf deal with this border case.  */
1224           sprintf (buffer, "%ld", n);
1225           p += strlen (buffer);
1226           return p;
1227         }
1228
1229       *p++ = '-';
1230       n = -n;
1231     }
1232
1233   if      (n < 10)                   { DIGITS_1 (1); }
1234   else if (n < 100)                  { DIGITS_2 (10); }
1235   else if (n < 1000)                 { DIGITS_3 (100); }
1236   else if (n < 10000)                { DIGITS_4 (1000); }
1237   else if (n < 100000)               { DIGITS_5 (10000); }
1238   else if (n < 1000000)              { DIGITS_6 (100000); }
1239   else if (n < 10000000)             { DIGITS_7 (1000000); }
1240   else if (n < 100000000)            { DIGITS_8 (10000000); }
1241   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1242 #if SIZEOF_LONG == 4
1243   /* ``if (1)'' serves only to preserve editor indentation. */
1244   else if (1)                        { DIGITS_10 (1000000000); }
1245 #else  /* SIZEOF_LONG != 4 */
1246   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1247   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1248   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1249   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1250   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1251   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1252   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1253   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1254   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1255   else                               { DIGITS_19 (1000000000000000000L); }
1256 #endif /* SIZEOF_LONG != 4 */
1257
1258   *p = '\0';
1259 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1260
1261   return p;
1262 }
1263
1264 #undef ONE_DIGIT
1265 #undef ONE_DIGIT_ADVANCE
1266
1267 #undef DIGITS_1
1268 #undef DIGITS_2
1269 #undef DIGITS_3
1270 #undef DIGITS_4
1271 #undef DIGITS_5
1272 #undef DIGITS_6
1273 #undef DIGITS_7
1274 #undef DIGITS_8
1275 #undef DIGITS_9
1276 #undef DIGITS_10
1277 #undef DIGITS_11
1278 #undef DIGITS_12
1279 #undef DIGITS_13
1280 #undef DIGITS_14
1281 #undef DIGITS_15
1282 #undef DIGITS_16
1283 #undef DIGITS_17
1284 #undef DIGITS_18
1285 #undef DIGITS_19
1286 \f
1287 /* Support for timers. */
1288
1289 #undef TIMER_WINDOWS
1290 #undef TIMER_GETTIMEOFDAY
1291 #undef TIMER_TIME
1292
1293 /* Depending on the OS and availability of gettimeofday(), one and
1294    only one of the above constants will be defined.  Virtually all
1295    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1296    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1297    non-Windows systems without gettimeofday.
1298
1299    #### Perhaps we should also support ftime(), which exists on old
1300    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1301    C, if memory serves me.)  */
1302
1303 #ifdef WINDOWS
1304 # define TIMER_WINDOWS
1305 #else  /* not WINDOWS */
1306 # ifdef HAVE_GETTIMEOFDAY
1307 #  define TIMER_GETTIMEOFDAY
1308 # else
1309 #  define TIMER_TIME
1310 # endif
1311 #endif /* not WINDOWS */
1312
1313 #ifdef TIMER_GETTIMEOFDAY
1314 typedef struct timeval wget_sys_time;
1315 #endif
1316
1317 #ifdef TIMER_TIME
1318 typedef time_t wget_sys_time;
1319 #endif
1320
1321 #ifdef TIMER_WINDOWS
1322 typedef ULARGE_INTEGER wget_sys_time;
1323 #endif
1324
1325 struct wget_timer {
1326   /* Whether the start time has been initialized. */
1327   int initialized;
1328
1329   /* The starting point in time which, subtracted from the current
1330      time, yields elapsed time. */
1331   wget_sys_time start;
1332
1333   /* The most recent elapsed time, calculated by wtimer_elapsed().
1334      Measured in milliseconds.  */
1335   double elapsed_last;
1336
1337   /* Approximately, the time elapsed between the true start of the
1338      measurement and the time represented by START.  */
1339   double elapsed_pre_start;
1340 };
1341
1342 /* Allocate a timer.  Calling wtimer_read on the timer will return
1343    zero.  It is not legal to call wtimer_update with a freshly
1344    allocated timer -- use wtimer_reset first.  */
1345
1346 struct wget_timer *
1347 wtimer_allocate (void)
1348 {
1349   struct wget_timer *wt = xnew (struct wget_timer);
1350   xzero (*wt);
1351   return wt;
1352 }
1353
1354 /* Allocate a new timer and reset it.  Return the new timer. */
1355
1356 struct wget_timer *
1357 wtimer_new (void)
1358 {
1359   struct wget_timer *wt = wtimer_allocate ();
1360   wtimer_reset (wt);
1361   return wt;
1362 }
1363
1364 /* Free the resources associated with the timer.  Its further use is
1365    prohibited.  */
1366
1367 void
1368 wtimer_delete (struct wget_timer *wt)
1369 {
1370   xfree (wt);
1371 }
1372
1373 /* Store system time to WST.  */
1374
1375 static void
1376 wtimer_sys_set (wget_sys_time *wst)
1377 {
1378 #ifdef TIMER_GETTIMEOFDAY
1379   gettimeofday (wst, NULL);
1380 #endif
1381
1382 #ifdef TIMER_TIME
1383   time (wst);
1384 #endif
1385
1386 #ifdef TIMER_WINDOWS
1387   /* We use GetSystemTime to get the elapsed time.  MSDN warns that
1388      system clock adjustments can skew the output of GetSystemTime
1389      when used as a timer and gives preference to GetTickCount and
1390      high-resolution timers.  But GetTickCount can overflow, and hires
1391      timers are typically used for profiling, not for regular time
1392      measurement.  Since we handle clock skew anyway, we just use
1393      GetSystemTime.  */
1394   FILETIME ft;
1395   SYSTEMTIME st;
1396   GetSystemTime (&st);
1397
1398   /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1399      FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1400      arithmetic on that.  */
1401   SystemTimeToFileTime (&st, &ft);
1402   wst->HighPart = ft.dwHighDateTime;
1403   wst->LowPart  = ft.dwLowDateTime;
1404 #endif
1405 }
1406
1407 /* Reset timer WT.  This establishes the starting point from which
1408    wtimer_elapsed() will return the number of elapsed milliseconds.
1409    It is allowed to reset a previously used timer.
1410
1411    If a non-zero value is used as START, the timer's values will be
1412    offset by START.  */
1413
1414 void
1415 wtimer_reset (struct wget_timer *wt)
1416 {
1417   /* Set the start time to the current time. */
1418   wtimer_sys_set (&wt->start);
1419   wt->elapsed_last = 0;
1420   wt->elapsed_pre_start = 0;
1421   wt->initialized = 1;
1422 }
1423
1424 static double
1425 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1426 {
1427 #ifdef TIMER_GETTIMEOFDAY
1428   return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1429           + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1430 #endif
1431
1432 #ifdef TIMER_TIME
1433   return 1000 * (*wst1 - *wst2);
1434 #endif
1435
1436 #ifdef WINDOWS
1437   /* VC++ 6 doesn't support direct cast of uint64 to double.  To work
1438      around this, we subtract, then convert to signed, then finally to
1439      double.  */
1440   return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1441 #endif
1442 }
1443
1444 /* Update the timer's elapsed interval.  This function causes the
1445    timer to call gettimeofday (or time(), etc.) to update its idea of
1446    current time.  To get the elapsed interval in milliseconds, use
1447    wtimer_read.
1448
1449    This function handles clock skew, i.e. time that moves backwards is
1450    ignored.  */
1451
1452 void
1453 wtimer_update (struct wget_timer *wt)
1454 {
1455   wget_sys_time now;
1456   double elapsed;
1457
1458   assert (wt->initialized != 0);
1459
1460   wtimer_sys_set (&now);
1461   elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1462
1463   /* Ideally we'd just return the difference between NOW and
1464      wt->start.  However, the system timer can be set back, and we
1465      could return a value smaller than when we were last called, even
1466      a negative value.  Both of these would confuse the callers, which
1467      expect us to return monotonically nondecreasing values.
1468
1469      Therefore: if ELAPSED is smaller than its previous known value,
1470      we reset wt->start to the current time and effectively start
1471      measuring from this point.  But since we don't want the elapsed
1472      value to start from zero, we set elapsed_pre_start to the last
1473      elapsed time and increment all future calculations by that
1474      amount.  */
1475
1476   if (elapsed < wt->elapsed_last)
1477     {
1478       wt->start = now;
1479       wt->elapsed_pre_start = wt->elapsed_last;
1480       elapsed = wt->elapsed_last;
1481     }
1482
1483   wt->elapsed_last = elapsed;
1484 }
1485
1486 /* Return the elapsed time in milliseconds between the last call to
1487    wtimer_reset and the last call to wtimer_update.
1488
1489    A typical use of the timer interface would be:
1490
1491        struct wtimer *timer = wtimer_new ();
1492        ... do something that takes a while ...
1493        wtimer_update ();
1494        double msecs = wtimer_read ();  */
1495
1496 double
1497 wtimer_read (const struct wget_timer *wt)
1498 {
1499   return wt->elapsed_last;
1500 }
1501
1502 /* Return the assessed granularity of the timer implementation, in
1503    milliseconds.  This is used by code that tries to substitute a
1504    better value for timers that have returned zero.  */
1505
1506 double
1507 wtimer_granularity (void)
1508 {
1509 #ifdef TIMER_GETTIMEOFDAY
1510   /* Granularity of gettimeofday varies wildly between architectures.
1511      However, it appears that on modern machines it tends to be better
1512      than 1ms.  Assume 100 usecs.  (Perhaps the configure process
1513      could actually measure this?)  */
1514   return 0.1;
1515 #endif
1516
1517 #ifdef TIMER_TIME
1518   return 1000;
1519 #endif
1520
1521 #ifdef TIMER_WINDOWS
1522   /* According to MSDN, GetSystemTime returns a broken-down time
1523      structure the smallest member of which are milliseconds.  */
1524   return 1;
1525 #endif
1526 }
1527 \f
1528 /* This should probably be at a better place, but it doesn't really
1529    fit into html-parse.c.  */
1530
1531 /* The function returns the pointer to the malloc-ed quoted version of
1532    string s.  It will recognize and quote numeric and special graphic
1533    entities, as per RFC1866:
1534
1535    `&' -> `&amp;'
1536    `<' -> `&lt;'
1537    `>' -> `&gt;'
1538    `"' -> `&quot;'
1539    SP  -> `&#32;'
1540
1541    No other entities are recognized or replaced.  */
1542 char *
1543 html_quote_string (const char *s)
1544 {
1545   const char *b = s;
1546   char *p, *res;
1547   int i;
1548
1549   /* Pass through the string, and count the new size.  */
1550   for (i = 0; *s; s++, i++)
1551     {
1552       if (*s == '&')
1553         i += 4;                 /* `amp;' */
1554       else if (*s == '<' || *s == '>')
1555         i += 3;                 /* `lt;' and `gt;' */
1556       else if (*s == '\"')
1557         i += 5;                 /* `quot;' */
1558       else if (*s == ' ')
1559         i += 4;                 /* #32; */
1560     }
1561   res = (char *)xmalloc (i + 1);
1562   s = b;
1563   for (p = res; *s; s++)
1564     {
1565       switch (*s)
1566         {
1567         case '&':
1568           *p++ = '&';
1569           *p++ = 'a';
1570           *p++ = 'm';
1571           *p++ = 'p';
1572           *p++ = ';';
1573           break;
1574         case '<': case '>':
1575           *p++ = '&';
1576           *p++ = (*s == '<' ? 'l' : 'g');
1577           *p++ = 't';
1578           *p++ = ';';
1579           break;
1580         case '\"':
1581           *p++ = '&';
1582           *p++ = 'q';
1583           *p++ = 'u';
1584           *p++ = 'o';
1585           *p++ = 't';
1586           *p++ = ';';
1587           break;
1588         case ' ':
1589           *p++ = '&';
1590           *p++ = '#';
1591           *p++ = '3';
1592           *p++ = '2';
1593           *p++ = ';';
1594           break;
1595         default:
1596           *p++ = *s;
1597         }
1598     }
1599   *p = '\0';
1600   return res;
1601 }
1602
1603 /* Determine the width of the terminal we're running on.  If that's
1604    not possible, return 0.  */
1605
1606 int
1607 determine_screen_width (void)
1608 {
1609   /* If there's a way to get the terminal size using POSIX
1610      tcgetattr(), somebody please tell me.  */
1611 #ifndef TIOCGWINSZ
1612   return 0;
1613 #else  /* TIOCGWINSZ */
1614   int fd;
1615   struct winsize wsz;
1616
1617   if (opt.lfilename != NULL)
1618     return 0;
1619
1620   fd = fileno (stderr);
1621   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1622     return 0;                   /* most likely ENOTTY */
1623
1624   return wsz.ws_col;
1625 #endif /* TIOCGWINSZ */
1626 }
1627
1628 /* Return a random number between 0 and MAX-1, inclusive.
1629
1630    If MAX is greater than the value of RAND_MAX+1 on the system, the
1631    returned value will be in the range [0, RAND_MAX].  This may be
1632    fixed in a future release.
1633
1634    The random number generator is seeded automatically the first time
1635    it is called.
1636
1637    This uses rand() for portability.  It has been suggested that
1638    random() offers better randomness, but this is not required for
1639    Wget, so I chose to go for simplicity and use rand
1640    unconditionally.
1641
1642    DO NOT use this for cryptographic purposes.  It is only meant to be
1643    used in situations where quality of the random numbers returned
1644    doesn't really matter.  */
1645
1646 int
1647 random_number (int max)
1648 {
1649   static int seeded;
1650   double bounded;
1651   int rnd;
1652
1653   if (!seeded)
1654     {
1655       srand (time (NULL));
1656       seeded = 1;
1657     }
1658   rnd = rand ();
1659
1660   /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1661      and enforce that assumption by masking other bits.  */
1662 #ifndef RAND_MAX
1663 # define RAND_MAX 32767
1664   rnd &= RAND_MAX;
1665 #endif
1666
1667   /* This is equivalent to rand() % max, but uses the high-order bits
1668      for better randomness on architecture where rand() is implemented
1669      using a simple congruential generator.  */
1670
1671   bounded = (double)max * rnd / (RAND_MAX + 1.0);
1672   return (int)bounded;
1673 }
1674
1675 /* Return a random uniformly distributed floating point number in the
1676    [0, 1) range.  The precision of returned numbers is 9 digits.
1677
1678    Modify this to use erand48() where available!  */
1679
1680 double
1681 random_float (void)
1682 {
1683   /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1684      sure it's greater than 1000.  */
1685   int rnd1 = random_number (1000);
1686   int rnd2 = random_number (1000);
1687   int rnd3 = random_number (1000);
1688   return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1689 }
1690
1691 #if 0
1692 /* A debugging function for checking whether an MD5 library works. */
1693
1694 #include "gen-md5.h"
1695
1696 char *
1697 debug_test_md5 (char *buf)
1698 {
1699   unsigned char raw[16];
1700   static char res[33];
1701   unsigned char *p1;
1702   char *p2;
1703   int cnt;
1704   ALLOCA_MD5_CONTEXT (ctx);
1705
1706   gen_md5_init (ctx);
1707   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1708   gen_md5_finish (ctx, raw);
1709
1710   p1 = raw;
1711   p2 = res;
1712   cnt = 16;
1713   while (cnt--)
1714     {
1715       *p2++ = XNUM_TO_digit (*p1 >> 4);
1716       *p2++ = XNUM_TO_digit (*p1 & 0xf);
1717       ++p1;
1718     }
1719   *p2 = '\0';
1720
1721   return res;
1722 }
1723 #endif
1724 \f
1725 /* Implementation of run_with_timeout, a generic timeout-forcing
1726    routine for systems with Unix-like signal handling.  */
1727
1728 #ifdef USE_SIGNAL_TIMEOUT
1729 # ifdef HAVE_SIGSETJMP
1730 #  define SETJMP(env) sigsetjmp (env, 1)
1731
1732 static sigjmp_buf run_with_timeout_env;
1733
1734 static RETSIGTYPE
1735 abort_run_with_timeout (int sig)
1736 {
1737   assert (sig == SIGALRM);
1738   siglongjmp (run_with_timeout_env, -1);
1739 }
1740 # else /* not HAVE_SIGSETJMP */
1741 #  define SETJMP(env) setjmp (env)
1742
1743 static jmp_buf run_with_timeout_env;
1744
1745 static RETSIGTYPE
1746 abort_run_with_timeout (int sig)
1747 {
1748   assert (sig == SIGALRM);
1749   /* We don't have siglongjmp to preserve the set of blocked signals;
1750      if we longjumped out of the handler at this point, SIGALRM would
1751      remain blocked.  We must unblock it manually. */
1752   int mask = siggetmask ();
1753   mask &= ~sigmask (SIGALRM);
1754   sigsetmask (mask);
1755
1756   /* Now it's safe to longjump. */
1757   longjmp (run_with_timeout_env, -1);
1758 }
1759 # endif /* not HAVE_SIGSETJMP */
1760
1761 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1762    setitimer where available, alarm otherwise.
1763
1764    TIMEOUT should be non-zero.  If the timeout value is so small that
1765    it would be rounded to zero, it is rounded to the least legal value
1766    instead (1us for setitimer, 1s for alarm).  That ensures that
1767    SIGALRM will be delivered in all cases.  */
1768
1769 static void
1770 alarm_set (double timeout)
1771 {
1772 #ifdef ITIMER_REAL
1773   /* Use the modern itimer interface. */
1774   struct itimerval itv;
1775   xzero (itv);
1776   itv.it_value.tv_sec = (long) timeout;
1777   itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1778   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1779     /* Ensure that we wait for at least the minimum interval.
1780        Specifying zero would mean "wait forever".  */
1781     itv.it_value.tv_usec = 1;
1782   setitimer (ITIMER_REAL, &itv, NULL);
1783 #else  /* not ITIMER_REAL */
1784   /* Use the old alarm() interface. */
1785   int secs = (int) timeout;
1786   if (secs == 0)
1787     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1788        because alarm(0) means "never deliver the alarm", i.e. "wait
1789        forever", which is not what someone who specifies a 0.5s
1790        timeout would expect.  */
1791     secs = 1;
1792   alarm (secs);
1793 #endif /* not ITIMER_REAL */
1794 }
1795
1796 /* Cancel the alarm set with alarm_set. */
1797
1798 static void
1799 alarm_cancel (void)
1800 {
1801 #ifdef ITIMER_REAL
1802   struct itimerval disable;
1803   xzero (disable);
1804   setitimer (ITIMER_REAL, &disable, NULL);
1805 #else  /* not ITIMER_REAL */
1806   alarm (0);
1807 #endif /* not ITIMER_REAL */
1808 }
1809
1810 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1811    seconds.  Returns non-zero if the function was interrupted with a
1812    timeout, zero otherwise.
1813
1814    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1815    using setitimer() or alarm().  The timeout is enforced by
1816    longjumping out of the SIGALRM handler.  This has several
1817    advantages compared to the traditional approach of relying on
1818    signals causing system calls to exit with EINTR:
1819
1820      * The callback function is *forcibly* interrupted after the
1821        timeout expires, (almost) regardless of what it was doing and
1822        whether it was in a syscall.  For example, a calculation that
1823        takes a long time is interrupted as reliably as an IO
1824        operation.
1825
1826      * It works with both SYSV and BSD signals because it doesn't
1827        depend on the default setting of SA_RESTART.
1828
1829      * It doesn't special handler setup beyond a simple call to
1830        signal().  (It does use sigsetjmp/siglongjmp, but they're
1831        optional.)
1832
1833    The only downside is that, if FUN allocates internal resources that
1834    are normally freed prior to exit from the functions, they will be
1835    lost in case of timeout.  */
1836
1837 int
1838 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1839 {
1840   int saved_errno;
1841
1842   if (timeout == 0)
1843     {
1844       fun (arg);
1845       return 0;
1846     }
1847
1848   signal (SIGALRM, abort_run_with_timeout);
1849   if (SETJMP (run_with_timeout_env) != 0)
1850     {
1851       /* Longjumped out of FUN with a timeout. */
1852       signal (SIGALRM, SIG_DFL);
1853       return 1;
1854     }
1855   alarm_set (timeout);
1856   fun (arg);
1857
1858   /* Preserve errno in case alarm() or signal() modifies it. */
1859   saved_errno = errno;
1860   alarm_cancel ();
1861   signal (SIGALRM, SIG_DFL);
1862   errno = saved_errno;
1863
1864   return 0;
1865 }
1866
1867 #else  /* not USE_SIGNAL_TIMEOUT */
1868
1869 #ifndef WINDOWS
1870 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1871    define it under Windows, because Windows has its own version of
1872    run_with_timeout that uses threads.  */
1873
1874 int
1875 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1876 {
1877   fun (arg);
1878   return 0;
1879 }
1880 #endif /* not WINDOWS */
1881 #endif /* not USE_SIGNAL_TIMEOUT */
1882 \f
1883 #ifndef WINDOWS
1884
1885 /* Sleep the specified amount of seconds.  On machines without
1886    nanosleep(), this may sleep shorter if interrupted by signals.  */
1887
1888 void
1889 xsleep (double seconds)
1890 {
1891 #ifdef HAVE_NANOSLEEP
1892   /* nanosleep is the preferred interface because it offers high
1893      accuracy and, more importantly, because it allows us to reliably
1894      restart after having been interrupted by a signal such as
1895      SIGWINCH.  */
1896   struct timespec sleep, remaining;
1897   sleep.tv_sec = (long) seconds;
1898   sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);
1899   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1900     /* If nanosleep has been interrupted by a signal, adjust the
1901        sleeping period and return to sleep.  */
1902     sleep = remaining;
1903 #else  /* not HAVE_NANOSLEEP */
1904 #ifdef HAVE_USLEEP
1905   /* If usleep is available, use it in preference to select.  */
1906   if (seconds > 1000)
1907     {
1908       /* usleep apparently accepts unsigned long, which means it can't
1909          sleep longer than ~70 min (35min if signed).  If the period
1910          is larger than what usleep can safely handle, use sleep
1911          first, then add usleep for subsecond accuracy.  */
1912       sleep (seconds);
1913       seconds -= (long) seconds;
1914     }
1915   usleep (seconds * 1000000L);
1916 #else  /* not HAVE_USLEEP */
1917 #ifdef HAVE_SELECT
1918   struct timeval sleep;
1919   sleep.tv_sec = (long) seconds;
1920   sleep.tv_usec = 1000000L * (seconds - (long) seconds);
1921   select (0, NULL, NULL, NULL, &sleep);
1922   /* If select returns -1 and errno is EINTR, it means we were
1923      interrupted by a signal.  But without knowing how long we've
1924      actually slept, we can't return to sleep.  Using gettimeofday to
1925      track sleeps is slow and unreliable due to clock skew.  */
1926 #else  /* not HAVE_SELECT */
1927   sleep (seconds);
1928 #endif /* not HAVE_SELECT */
1929 #endif /* not HAVE_USLEEP */
1930 #endif /* not HAVE_NANOSLEEP */
1931 }
1932
1933 #endif /* not WINDOWS */