sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 #include "wget.h"
  63 #include "utils.h"
  64 #include "fnmatch.h"
  65 #include "hash.h"
  66
  67 #ifndef errno
  68 extern int errno;
  69 #endif
  70
  71 /* This section implements several wrappers around the basic
  72    allocation routines.  This is done for two reasons: first, so that
  73    the callers of these functions need not consistently check for
  74    errors.  If there is not enough virtual memory for running Wget,
  75    something is seriously wrong, and Wget exits with an appropriate
  76    error message.
  77
  78    The second reason why these are useful is that, if DEBUG_MALLOC is
  79    defined, they also provide a handy (if crude) malloc debugging
  80    interface that checks memory leaks.  */
  81
  82 /* Croak the fatal memory error and bail out with non-zero exit
  83    status.  */
  84 static void
  85 memfatal (const char *what)
  86 {
  87   /* Make sure we don't try to store part of the log line, and thus
  88      call malloc.  */
  89   log_set_save_context (0);
  90   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  91   exit (1);
  92 }
  93
  94 /* These functions end with _real because they need to be
  95    distinguished from the debugging functions, and from the macros.
  96    Explanation follows:
  97
  98    If memory debugging is not turned on, wget.h defines these:
  99
 100      #define xmalloc xmalloc_real
 101      #define xrealloc xrealloc_real
 102      #define xstrdup xstrdup_real
 103      #define xfree free
 104
 105    In case of memory debugging, the definitions are a bit more
 106    complex, because we want to provide more information, *and* we want
 107    to call the debugging code.  (The former is the reason why xmalloc
 108    and friends need to be macros in the first place.)  Then it looks
 109    like this:
 110
 111      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 112      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 113      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 114      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 115
 116    Each of the *_debug function does its magic and calls the real one.  */
 117
 118 #ifdef DEBUG_MALLOC
 119 # define STATIC_IF_DEBUG static
 120 #else
 121 # define STATIC_IF_DEBUG
 122 #endif
 123
 124 STATIC_IF_DEBUG void *
 125 xmalloc_real (size_t size)
 126 {
 127   void *ptr = malloc (size);
 128   if (!ptr)
 129     memfatal ("malloc");
 130   return ptr;
 131 }
 132
 133 STATIC_IF_DEBUG void *
 134 xrealloc_real (void *ptr, size_t newsize)
 135 {
 136   void *newptr;
 137
 138   /* Not all Un*xes have the feature of realloc() that calling it with
 139      a NULL-pointer is the same as malloc(), but it is easy to
 140      simulate.  */
 141   if (ptr)
 142     newptr = realloc (ptr, newsize);
 143   else
 144     newptr = malloc (newsize);
 145   if (!newptr)
 146     memfatal ("realloc");
 147   return newptr;
 148 }
 149
 150 STATIC_IF_DEBUG char *
 151 xstrdup_real (const char *s)
 152 {
 153   char *copy;
 154
 155 #ifndef HAVE_STRDUP
 156   int l = strlen (s);
 157   copy = malloc (l + 1);
 158   if (!copy)
 159     memfatal ("strdup");
 160   memcpy (copy, s, l + 1);
 161 #else  /* HAVE_STRDUP */
 162   copy = strdup (s);
 163   if (!copy)
 164     memfatal ("strdup");
 165 #endif /* HAVE_STRDUP */
 166
 167   return copy;
 168 }
 169
 170 #ifdef DEBUG_MALLOC
 171
 172 /* Crude home-grown routines for debugging some malloc-related
 173    problems.  Featured:
 174
 175    * Counting the number of malloc and free invocations, and reporting
 176      the "balance", i.e. how many times more malloc was called than it
 177      was the case with free.
 178
 179    * Making malloc store its entry into a simple array and free remove
 180      stuff from that array.  At the end, print the pointers which have
 181      not been freed, along with the source file and the line number.
 182      This also has the side-effect of detecting freeing memory that
 183      was never allocated.
 184
 185    Note that this kind of memory leak checking strongly depends on
 186    every malloc() being followed by a free(), even if the program is
 187    about to finish.  Wget is careful to free the data structure it
 188    allocated in init.c.  */
 189
 190 static int malloc_count, free_count;
 191
 192 static struct {
 193   char *ptr;
 194   const char *file;
 195   int line;
 196 } malloc_debug[100000];
 197
 198 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 199    which can be a real problem.  It would be nice to use a hash table
 200    for malloc_debug, but the functions in hash.c are not suitable
 201    because they can call malloc() themselves.  Maybe it would work if
 202    the hash table were preallocated to a huge size, and if we set the
 203    rehash threshold to 1.0.  */
 204
 205 /* Register PTR in malloc_debug.  Abort if this is not possible
 206    (presumably due to the number of current allocations exceeding the
 207    size of malloc_debug.)  */
 208
 209 static void
 210 register_ptr (void *ptr, const char *file, int line)
 211 {
 212   int i;
 213   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 214     if (malloc_debug[i].ptr == NULL)
 215       {
 216         malloc_debug[i].ptr = ptr;
 217         malloc_debug[i].file = file;
 218         malloc_debug[i].line = line;
 219         return;
 220       }
 221   abort ();
 222 }
 223
 224 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 225    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 226
 227 static void
 228 unregister_ptr (void *ptr)
 229 {
 230   int i;
 231   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 232     if (malloc_debug[i].ptr == ptr)
 233       {
 234         malloc_debug[i].ptr = NULL;
 235         return;
 236       }
 237   abort ();
 238 }
 239
 240 /* Print the malloc debug stats that can be gathered from the above
 241    information.  Currently this is the count of mallocs, frees, the
 242    difference between the two, and the dump of the contents of
 243    malloc_debug.  The last part are the memory leaks.  */
 244
 245 void
 246 print_malloc_debug_stats (void)
 247 {
 248   int i;
 249   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 250           malloc_count, free_count, malloc_count - free_count);
 251   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 252     if (malloc_debug[i].ptr != NULL)
 253       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 254               malloc_debug[i].file, malloc_debug[i].line);
 255 }
 256
 257 void *
 258 xmalloc_debug (size_t size, const char *source_file, int source_line)
 259 {
 260   void *ptr = xmalloc_real (size);
 261   ++malloc_count;
 262   register_ptr (ptr, source_file, source_line);
 263   return ptr;
 264 }
 265
 266 void
 267 xfree_debug (void *ptr, const char *source_file, int source_line)
 268 {
 269   assert (ptr != NULL);
 270   ++free_count;
 271   unregister_ptr (ptr);
 272   free (ptr);
 273 }
 274
 275 void *
 276 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 277 {
 278   void *newptr = xrealloc_real (ptr, newsize);
 279   if (!ptr)
 280     {
 281       ++malloc_count;
 282       register_ptr (newptr, source_file, source_line);
 283     }
 284   else if (newptr != ptr)
 285     {
 286       unregister_ptr (ptr);
 287       register_ptr (newptr, source_file, source_line);
 288     }
 289   return newptr;
 290 }
 291
 292 char *
 293 xstrdup_debug (const char *s, const char *source_file, int source_line)
 294 {
 295   char *copy = xstrdup_real (s);
 296   ++malloc_count;
 297   register_ptr (copy, source_file, source_line);
 298   return copy;
 299 }
 300
 301 #endif /* DEBUG_MALLOC */
 302 \f
 303 /* Utility function: like xstrdup(), but also lowercases S.  */
 304
 305 char *
 306 xstrdup_lower (const char *s)
 307 {
 308   char *copy = xstrdup (s);
 309   char *p = copy;
 310   for (; *p; p++)
 311     *p = TOLOWER (*p);
 312   return copy;
 313 }
 314
 315 /* Return a count of how many times CHR occurs in STRING. */
 316
 317 int
 318 count_char (const char *string, char chr)
 319 {
 320   const char *p;
 321   int count = 0;
 322   for (p = string; *p; p++)
 323     if (*p == chr)
 324       ++count;
 325   return count;
 326 }
 327
 328 /* Copy the string formed by two pointers (one on the beginning, other
 329    on the char after the last char) to a new, malloc-ed location.
 330    0-terminate it.  */
 331 char *
 332 strdupdelim (const char *beg, const char *end)
 333 {
 334   char *res = (char *)xmalloc (end - beg + 1);
 335   memcpy (res, beg, end - beg);
 336   res[end - beg] = '\0';
 337   return res;
 338 }
 339
 340 /* Parse a string containing comma-separated elements, and return a
 341    vector of char pointers with the elements.  Spaces following the
 342    commas are ignored.  */
 343 char **
 344 sepstring (const char *s)
 345 {
 346   char **res;
 347   const char *p;
 348   int i = 0;
 349
 350   if (!s || !*s)
 351     return NULL;
 352   res = NULL;
 353   p = s;
 354   while (*s)
 355     {
 356       if (*s == ',')
 357         {
 358           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 359           res[i] = strdupdelim (p, s);
 360           res[++i] = NULL;
 361           ++s;
 362           /* Skip the blanks following the ','.  */
 363           while (ISSPACE (*s))
 364             ++s;
 365           p = s;
 366         }
 367       else
 368         ++s;
 369     }
 370   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 371   res[i] = strdupdelim (p, s);
 372   res[i + 1] = NULL;
 373   return res;
 374 }
 375 \f
 376 /* Return pointer to a static char[] buffer in which zero-terminated
 377    string-representation of TM (in form hh:mm:ss) is printed.
 378
 379    If TM is non-NULL, the current time-in-seconds will be stored
 380    there.
 381
 382    (#### This is misleading: one would expect TM would be used instead
 383    of the current time in that case.  This design was probably
 384    influenced by the design time(2), and should be changed at some
 385    points.  No callers use non-NULL TM anyway.)  */
 386
 387 char *
 388 time_str (time_t *tm)
 389 {
 390   static char output[15];
 391   struct tm *ptm;
 392   time_t secs = time (tm);
 393
 394   if (secs == -1)
 395     {
 396       /* In case of error, return the empty string.  Maybe we should
 397          just abort if this happens?  */
 398       *output = '\0';
 399       return output;
 400     }
 401   ptm = localtime (&secs);
 402   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 403   return output;
 404 }
 405
 406 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 407
 408 char *
 409 datetime_str (time_t *tm)
 410 {
 411   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 412   struct tm *ptm;
 413   time_t secs = time (tm);
 414
 415   if (secs == -1)
 416     {
 417       /* In case of error, return the empty string.  Maybe we should
 418          just abort if this happens?  */
 419       *output = '\0';
 420       return output;
 421     }
 422   ptm = localtime (&secs);
 423   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 424            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 425            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 426   return output;
 427 }
 428 \f
 429 /* The Windows versions of the following two functions are defined in
 430    mswindows.c.  */
 431
 432 #ifndef WINDOWS
 433 void
 434 fork_to_background (void)
 435 {
 436   pid_t pid;
 437   /* Whether we arrange our own version of opt.lfilename here.  */
 438   int changedp = 0;
 439
 440   if (!opt.lfilename)
 441     {
 442       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 443       changedp = 1;
 444     }
 445   pid = fork ();
 446   if (pid < 0)
 447     {
 448       /* parent, error */
 449       perror ("fork");
 450       exit (1);
 451     }
 452   else if (pid != 0)
 453     {
 454       /* parent, no error */
 455       printf (_("Continuing in background, pid %d.\n"), (int)pid);
 456       if (changedp)
 457         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 458       exit (0);                 /* #### should we use _exit()? */
 459     }
 460
 461   /* child: give up the privileges and keep running. */
 462   setsid ();
 463   freopen ("/dev/null", "r", stdin);
 464   freopen ("/dev/null", "w", stdout);
 465   freopen ("/dev/null", "w", stderr);
 466 }
 467 #endif /* not WINDOWS */
 468 \f
 469 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 470    specified with TM.  */
 471 void
 472 touch (const char *file, time_t tm)
 473 {
 474 #ifdef HAVE_STRUCT_UTIMBUF
 475   struct utimbuf times;
 476   times.actime = times.modtime = tm;
 477 #else
 478   time_t times[2];
 479   times[0] = times[1] = tm;
 480 #endif
 481
 482   if (utime (file, &times) == -1)
 483     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 484 }
 485
 486 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 487    nothing under MS-Windows.  */
 488 int
 489 remove_link (const char *file)
 490 {
 491   int err = 0;
 492   struct stat st;
 493
 494   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 495     {
 496       DEBUGP (("Unlinking %s (symlink).\n", file));
 497       err = unlink (file);
 498       if (err != 0)
 499         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 500                    file, strerror (errno));
 501     }
 502   return err;
 503 }
 504
 505 /* Does FILENAME exist?  This is quite a lousy implementation, since
 506    it supplies no error codes -- only a yes-or-no answer.  Thus it
 507    will return that a file does not exist if, e.g., the directory is
 508    unreadable.  I don't mind it too much currently, though.  The
 509    proper way should, of course, be to have a third, error state,
 510    other than true/false, but that would introduce uncalled-for
 511    additional complexity to the callers.  */
 512 int
 513 file_exists_p (const char *filename)
 514 {
 515 #ifdef HAVE_ACCESS
 516   return access (filename, F_OK) >= 0;
 517 #else
 518   struct stat buf;
 519   return stat (filename, &buf) >= 0;
 520 #endif
 521 }
 522
 523 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 524    Returns 0 on error.  */
 525 int
 526 file_non_directory_p (const char *path)
 527 {
 528   struct stat buf;
 529   /* Use lstat() rather than stat() so that symbolic links pointing to
 530      directories can be identified correctly.  */
 531   if (lstat (path, &buf) != 0)
 532     return 0;
 533   return S_ISDIR (buf.st_mode) ? 0 : 1;
 534 }
 535
 536 /* Return a unique filename, given a prefix and count */
 537 static char *
 538 unique_name_1 (const char *fileprefix, int count)
 539 {
 540   char *filename;
 541
 542   if (count)
 543     {
 544       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 545       sprintf (filename, "%s.%d", fileprefix, count);
 546     }
 547   else
 548     filename = xstrdup (fileprefix);
 549
 550   if (!file_exists_p (filename))
 551     return filename;
 552   else
 553     {
 554       xfree (filename);
 555       return NULL;
 556     }
 557 }
 558
 559 /* Return a unique file name, based on PREFIX.  */
 560 char *
 561 unique_name (const char *prefix)
 562 {
 563   char *file = NULL;
 564   int count = 0;
 565
 566   while (!file)
 567     file = unique_name_1 (prefix, count++);
 568   return file;
 569 }
 570 \f
 571 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 572    are missing, create them first.  In case any mkdir() call fails,
 573    return its error status.  Returns 0 on successful completion.
 574
 575    The behaviour of this function should be identical to the behaviour
 576    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 577 int
 578 make_directory (const char *directory)
 579 {
 580   int quit = 0;
 581   int i;
 582   int ret = 0;
 583   char *dir;
 584
 585   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 586      function is unsafe if called with a read-only char *argument.  */
 587   STRDUP_ALLOCA (dir, directory);
 588
 589   /* If the first character of dir is '/', skip it (and thus enable
 590      creation of absolute-pathname directories.  */
 591   for (i = (*dir == '/'); 1; ++i)
 592     {
 593       for (; dir[i] && dir[i] != '/'; i++)
 594         ;
 595       if (!dir[i])
 596         quit = 1;
 597       dir[i] = '\0';
 598       /* Check whether the directory already exists.  Allow creation of
 599          of intermediate directories to fail, as the initial path components
 600          are not necessarily directories!  */
 601       if (!file_exists_p (dir))
 602         ret = mkdir (dir, 0777);
 603       else
 604         ret = 0;
 605       if (quit)
 606         break;
 607       else
 608         dir[i] = '/';
 609     }
 610   return ret;
 611 }
 612
 613 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 614    should be a file name.
 615
 616    file_merge("/foo/bar", "baz")  => "/foo/baz"
 617    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 618    file_merge("foo", "bar")       => "bar"
 619
 620    In other words, it's a simpler and gentler version of uri_merge_1.  */
 621
 622 char *
 623 file_merge (const char *base, const char *file)
 624 {
 625   char *result;
 626   const char *cut = (const char *)strrchr (base, '/');
 627
 628   if (!cut)
 629     return xstrdup (file);
 630
 631   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 632   memcpy (result, base, cut - base);
 633   result[cut - base] = '/';
 634   strcpy (result + (cut - base) + 1, file);
 635
 636   return result;
 637 }
 638 \f
 639 static int in_acclist PARAMS ((const char *const *, const char *, int));
 640
 641 /* Determine whether a file is acceptable to be followed, according to
 642    lists of patterns to accept/reject.  */
 643 int
 644 acceptable (const char *s)
 645 {
 646   int l = strlen (s);
 647
 648   while (l && s[l] != '/')
 649     --l;
 650   if (s[l] == '/')
 651     s += (l + 1);
 652   if (opt.accepts)
 653     {
 654       if (opt.rejects)
 655         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 656                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 657       else
 658         return in_acclist ((const char *const *)opt.accepts, s, 1);
 659     }
 660   else if (opt.rejects)
 661     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 662   return 1;
 663 }
 664
 665 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 666    `/something', frontcmp() will return 1 only if S2 begins with
 667    `/something'.  Otherwise, 0 is returned.  */
 668 int
 669 frontcmp (const char *s1, const char *s2)
 670 {
 671   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 672   return !*s1;
 673 }
 674
 675 /* Iterate through STRLIST, and return the first element that matches
 676    S, through wildcards or front comparison (as appropriate).  */
 677 static char *
 678 proclist (char **strlist, const char *s, enum accd flags)
 679 {
 680   char **x;
 681
 682   for (x = strlist; *x; x++)
 683     if (has_wildcards_p (*x))
 684       {
 685         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 686           break;
 687       }
 688     else
 689       {
 690         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 691         if (frontcmp (p, s))
 692           break;
 693       }
 694   return *x;
 695 }
 696
 697 /* Returns whether DIRECTORY is acceptable for download, wrt the
 698    include/exclude lists.
 699
 700    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 701    and absolute paths may be freely intermixed.  */
 702 int
 703 accdir (const char *directory, enum accd flags)
 704 {
 705   /* Remove starting '/'.  */
 706   if (flags & ALLABS && *directory == '/')
 707     ++directory;
 708   if (opt.includes)
 709     {
 710       if (!proclist (opt.includes, directory, flags))
 711         return 0;
 712     }
 713   if (opt.excludes)
 714     {
 715       if (proclist (opt.excludes, directory, flags))
 716         return 0;
 717     }
 718   return 1;
 719 }
 720
 721 /* Match the end of STRING against PATTERN.  For instance:
 722
 723    match_backwards ("abc", "bc") -> 1
 724    match_backwards ("abc", "ab") -> 0
 725    match_backwards ("abc", "abc") -> 1 */
 726 int
 727 match_tail (const char *string, const char *pattern)
 728 {
 729   int i, j;
 730
 731   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 732     if (string[i] != pattern[j])
 733       break;
 734   /* If the pattern was exhausted, the match was succesful.  */
 735   if (j == -1)
 736     return 1;
 737   else
 738     return 0;
 739 }
 740
 741 /* Checks whether string S matches each element of ACCEPTS.  A list
 742    element are matched either with fnmatch() or match_tail(),
 743    according to whether the element contains wildcards or not.
 744
 745    If the BACKWARD is 0, don't do backward comparison -- just compare
 746    them normally.  */
 747 static int
 748 in_acclist (const char *const *accepts, const char *s, int backward)
 749 {
 750   for (; *accepts; accepts++)
 751     {
 752       if (has_wildcards_p (*accepts))
 753         {
 754           /* fnmatch returns 0 if the pattern *does* match the
 755              string.  */
 756           if (fnmatch (*accepts, s, 0) == 0)
 757             return 1;
 758         }
 759       else
 760         {
 761           if (backward)
 762             {
 763               if (match_tail (s, *accepts))
 764                 return 1;
 765             }
 766           else
 767             {
 768               if (!strcmp (s, *accepts))
 769                 return 1;
 770             }
 771         }
 772     }
 773   return 0;
 774 }
 775
 776 /* Return the location of STR's suffix (file extension).  Examples:
 777    suffix ("foo.bar")       -> "bar"
 778    suffix ("foo.bar.baz")   -> "baz"
 779    suffix ("/foo/bar")      -> NULL
 780    suffix ("/foo.bar/baz")  -> NULL  */
 781 char *
 782 suffix (const char *str)
 783 {
 784   int i;
 785
 786   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 787     ;
 788
 789   if (str[i++] == '.')
 790     return (char *)str + i;
 791   else
 792     return NULL;
 793 }
 794
 795 /* Read a line from FP and return the pointer to freshly allocated
 796    storage.  The stoarage space is obtained through malloc() and
 797    should be freed with free() when it is no longer needed.
 798
 799    The length of the line is not limited, except by available memory.
 800    The newline character at the end of line is retained.  The line is
 801    terminated with a zero character.
 802
 803    After end-of-file is encountered without anything being read, NULL
 804    is returned.  NULL is also returned on error.  To distinguish
 805    between these two cases, use the stdio function ferror().  */
 806
 807 char *
 808 read_whole_line (FILE *fp)
 809 {
 810   int length = 0;
 811   int bufsize = 82;
 812   char *line = (char *)xmalloc (bufsize);
 813
 814   while (fgets (line + length, bufsize - length, fp))
 815     {
 816       length += strlen (line + length);
 817       if (length == 0)
 818         /* Possible for example when reading from a binary file where
 819            a line begins with \0.  */
 820         continue;
 821
 822       if (line[length - 1] == '\n')
 823         break;
 824
 825       /* fgets() guarantees to read the whole line, or to use up the
 826          space we've given it.  We can double the buffer
 827          unconditionally.  */
 828       bufsize <<= 1;
 829       line = xrealloc (line, bufsize);
 830     }
 831   if (length == 0 || ferror (fp))
 832     {
 833       xfree (line);
 834       return NULL;
 835     }
 836   if (length + 1 < bufsize)
 837     /* Relieve the memory from our exponential greediness.  We say
 838        `length + 1' because the terminating \0 is not included in
 839        LENGTH.  We don't need to zero-terminate the string ourselves,
 840        though, because fgets() does that.  */
 841     line = xrealloc (line, length + 1);
 842   return line;
 843 }
 844 \f
 845 /* Read FILE into memory.  A pointer to `struct file_memory' are
 846    returned; use struct element `content' to access file contents, and
 847    the element `length' to know the file length.  `content' is *not*
 848    zero-terminated, and you should *not* read or write beyond the [0,
 849    length) range of characters.
 850
 851    After you are done with the file contents, call read_file_free to
 852    release the memory.
 853
 854    Depending on the operating system and the type of file that is
 855    being read, read_file() either mmap's the file into memory, or
 856    reads the file into the core using read().
 857
 858    If file is named "-", fileno(stdin) is used for reading instead.
 859    If you want to read from a real file named "-", use "./-" instead.  */
 860
 861 struct file_memory *
 862 read_file (const char *file)
 863 {
 864   int fd;
 865   struct file_memory *fm;
 866   long size;
 867   int inhibit_close = 0;
 868
 869   /* Some magic in the finest tradition of Perl and its kin: if FILE
 870      is "-", just use stdin.  */
 871   if (HYPHENP (file))
 872     {
 873       fd = fileno (stdin);
 874       inhibit_close = 1;
 875       /* Note that we don't inhibit mmap() in this case.  If stdin is
 876          redirected from a regular file, mmap() will still work.  */
 877     }
 878   else
 879     fd = open (file, O_RDONLY);
 880   if (fd < 0)
 881     return NULL;
 882   fm = xmalloc (sizeof (struct file_memory));
 883
 884 #ifdef HAVE_MMAP
 885   {
 886     struct stat buf;
 887     if (fstat (fd, &buf) < 0)
 888       goto mmap_lose;
 889     fm->length = buf.st_size;
 890     /* NOTE: As far as I know, the callers of this function never
 891        modify the file text.  Relying on this would enable us to
 892        specify PROT_READ and MAP_SHARED for a marginal gain in
 893        efficiency, but at some cost to generality.  */
 894     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 895                         MAP_PRIVATE, fd, 0);
 896     if (fm->content == (char *)MAP_FAILED)
 897       goto mmap_lose;
 898     if (!inhibit_close)
 899       close (fd);
 900
 901     fm->mmap_p = 1;
 902     return fm;
 903   }
 904
 905  mmap_lose:
 906   /* The most common reason why mmap() fails is that FD does not point
 907      to a plain file.  However, it's also possible that mmap() doesn't
 908      work for a particular type of file.  Therefore, whenever mmap()
 909      fails, we just fall back to the regular method.  */
 910 #endif /* HAVE_MMAP */
 911
 912   fm->length = 0;
 913   size = 512;                   /* number of bytes fm->contents can
 914                                    hold at any given time. */
 915   fm->content = xmalloc (size);
 916   while (1)
 917     {
 918       long nread;
 919       if (fm->length > size / 2)
 920         {
 921           /* #### I'm not sure whether the whole exponential-growth
 922              thing makes sense with kernel read.  On Linux at least,
 923              read() refuses to read more than 4K from a file at a
 924              single chunk anyway.  But other Unixes might optimize it
 925              better, and it doesn't *hurt* anything, so I'm leaving
 926              it.  */
 927
 928           /* Normally, we grow SIZE exponentially to make the number
 929              of calls to read() and realloc() logarithmic in relation
 930              to file size.  However, read() can read an amount of data
 931              smaller than requested, and it would be unreasonably to
 932              double SIZE every time *something* was read.  Therefore,
 933              we double SIZE only when the length exceeds half of the
 934              entire allocated size.  */
 935           size <<= 1;
 936           fm->content = xrealloc (fm->content, size);
 937         }
 938       nread = read (fd, fm->content + fm->length, size - fm->length);
 939       if (nread > 0)
 940         /* Successful read. */
 941         fm->length += nread;
 942       else if (nread < 0)
 943         /* Error. */
 944         goto lose;
 945       else
 946         /* EOF */
 947         break;
 948     }
 949   if (!inhibit_close)
 950     close (fd);
 951   if (size > fm->length && fm->length != 0)
 952     /* Due to exponential growth of fm->content, the allocated region
 953        might be much larger than what is actually needed.  */
 954     fm->content = xrealloc (fm->content, fm->length);
 955   fm->mmap_p = 0;
 956   return fm;
 957
 958  lose:
 959   if (!inhibit_close)
 960     close (fd);
 961   xfree (fm->content);
 962   xfree (fm);
 963   return NULL;
 964 }
 965
 966 /* Release the resources held by FM.  Specifically, this calls
 967    munmap() or xfree() on fm->content, depending whether mmap or
 968    malloc/read were used to read in the file.  It also frees the
 969    memory needed to hold the FM structure itself.  */
 970
 971 void
 972 read_file_free (struct file_memory *fm)
 973 {
 974 #ifdef HAVE_MMAP
 975   if (fm->mmap_p)
 976     {
 977       munmap (fm->content, fm->length);
 978     }
 979   else
 980 #endif
 981     {
 982       xfree (fm->content);
 983     }
 984   xfree (fm);
 985 }
 986 \f
 987 /* Free the pointers in a NULL-terminated vector of pointers, then
 988    free the pointer itself.  */
 989 void
 990 free_vec (char **vec)
 991 {
 992   if (vec)
 993     {
 994       char **p = vec;
 995       while (*p)
 996         xfree (*p++);
 997       xfree (vec);
 998     }
 999 }
1000
1001 /* Append vector V2 to vector V1.  The function frees V2 and
1002    reallocates V1 (thus you may not use the contents of neither
1003    pointer after the call).  If V1 is NULL, V2 is returned.  */
1004 char **
1005 merge_vecs (char **v1, char **v2)
1006 {
1007   int i, j;
1008
1009   if (!v1)
1010     return v2;
1011   if (!v2)
1012     return v1;
1013   if (!*v2)
1014     {
1015       /* To avoid j == 0 */
1016       xfree (v2);
1017       return v1;
1018     }
1019   /* Count v1.  */
1020   for (i = 0; v1[i]; i++);
1021   /* Count v2.  */
1022   for (j = 0; v2[j]; j++);
1023   /* Reallocate v1.  */
1024   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1025   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1026   xfree (v2);
1027   return v1;
1028 }
1029
1030 /* A set of simple-minded routines to store strings in a linked list.
1031    This used to also be used for searching, but now we have hash
1032    tables for that.  */
1033
1034 /* It's a shame that these simple things like linked lists and hash
1035    tables (see hash.c) need to be implemented over and over again.  It
1036    would be nice to be able to use the routines from glib -- see
1037    www.gtk.org for details.  However, that would make Wget depend on
1038    glib, and I want to avoid dependencies to external libraries for
1039    reasons of convenience and portability (I suspect Wget is more
1040    portable than anything ever written for Gnome).  */
1041
1042 /* Append an element to the list.  If the list has a huge number of
1043    elements, this can get slow because it has to find the list's
1044    ending.  If you think you have to call slist_append in a loop,
1045    think about calling slist_prepend() followed by slist_nreverse().  */
1046
1047 slist *
1048 slist_append (slist *l, const char *s)
1049 {
1050   slist *newel = (slist *)xmalloc (sizeof (slist));
1051   slist *beg = l;
1052
1053   newel->string = xstrdup (s);
1054   newel->next = NULL;
1055
1056   if (!l)
1057     return newel;
1058   /* Find the last element.  */
1059   while (l->next)
1060     l = l->next;
1061   l->next = newel;
1062   return beg;
1063 }
1064
1065 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1066
1067 slist *
1068 slist_prepend (slist *l, const char *s)
1069 {
1070   slist *newel = (slist *)xmalloc (sizeof (slist));
1071   newel->string = xstrdup (s);
1072   newel->next = l;
1073   return newel;
1074 }
1075
1076 /* Destructively reverse L. */
1077
1078 slist *
1079 slist_nreverse (slist *l)
1080 {
1081   slist *prev = NULL;
1082   while (l)
1083     {
1084       slist *next = l->next;
1085       l->next = prev;
1086       prev = l;
1087       l = next;
1088     }
1089   return prev;
1090 }
1091
1092 /* Is there a specific entry in the list?  */
1093 int
1094 slist_contains (slist *l, const char *s)
1095 {
1096   for (; l; l = l->next)
1097     if (!strcmp (l->string, s))
1098       return 1;
1099   return 0;
1100 }
1101
1102 /* Free the whole slist.  */
1103 void
1104 slist_free (slist *l)
1105 {
1106   while (l)
1107     {
1108       slist *n = l->next;
1109       xfree (l->string);
1110       xfree (l);
1111       l = n;
1112     }
1113 }
1114 \f
1115 /* Sometimes it's useful to create "sets" of strings, i.e. special
1116    hash tables where you want to store strings as keys and merely
1117    query for their existence.  Here is a set of utility routines that
1118    makes that transparent.  */
1119
1120 void
1121 string_set_add (struct hash_table *ht, const char *s)
1122 {
1123   /* First check whether the set element already exists.  If it does,
1124      do nothing so that we don't have to free() the old element and
1125      then strdup() a new one.  */
1126   if (hash_table_contains (ht, s))
1127     return;
1128
1129   /* We use "1" as value.  It provides us a useful and clear arbitrary
1130      value, and it consumes no memory -- the pointers to the same
1131      string "1" will be shared by all the key-value pairs in all `set'
1132      hash tables.  */
1133   hash_table_put (ht, xstrdup (s), "1");
1134 }
1135
1136 /* Synonym for hash_table_contains... */
1137
1138 int
1139 string_set_contains (struct hash_table *ht, const char *s)
1140 {
1141   return hash_table_contains (ht, s);
1142 }
1143
1144 static int
1145 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1146 {
1147   xfree (key);
1148   return 0;
1149 }
1150
1151 void
1152 string_set_free (struct hash_table *ht)
1153 {
1154   hash_table_map (ht, string_set_free_mapper, NULL);
1155   hash_table_destroy (ht);
1156 }
1157
1158 static int
1159 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1160 {
1161   xfree (key);
1162   xfree (value);
1163   return 0;
1164 }
1165
1166 /* Another utility function: call free() on all keys and values of HT.  */
1167
1168 void
1169 free_keys_and_values (struct hash_table *ht)
1170 {
1171   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1172 }
1173
1174 \f
1175 /* Engine for legible and legible_very_long; this function works on
1176    strings.  */
1177
1178 static char *
1179 legible_1 (const char *repr)
1180 {
1181   static char outbuf[128];
1182   int i, i1, mod;
1183   char *outptr;
1184   const char *inptr;
1185
1186   /* Reset the pointers.  */
1187   outptr = outbuf;
1188   inptr = repr;
1189   /* If the number is negative, shift the pointers.  */
1190   if (*inptr == '-')
1191     {
1192       *outptr++ = '-';
1193       ++inptr;
1194     }
1195   /* How many digits before the first separator?  */
1196   mod = strlen (inptr) % 3;
1197   /* Insert them.  */
1198   for (i = 0; i < mod; i++)
1199     *outptr++ = inptr[i];
1200   /* Now insert the rest of them, putting separator before every
1201      third digit.  */
1202   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1203     {
1204       if (i % 3 == 0 && i1 != 0)
1205         *outptr++ = ',';
1206       *outptr++ = inptr[i1];
1207     }
1208   /* Zero-terminate the string.  */
1209   *outptr = '\0';
1210   return outbuf;
1211 }
1212
1213 /* Legible -- return a static pointer to the legibly printed long.  */
1214 char *
1215 legible (long l)
1216 {
1217   char inbuf[24];
1218   /* Print the number into the buffer.  */
1219   number_to_string (inbuf, l);
1220   return legible_1 (inbuf);
1221 }
1222
1223 /* Write a string representation of NUMBER into the provided buffer.
1224    We cannot use sprintf() because we cannot be sure whether the
1225    platform supports printing of what we chose for VERY_LONG_TYPE.
1226
1227    Example: Gcc supports `long long' under many platforms, but on many
1228    of those the native libc knows nothing of it and therefore cannot
1229    print it.
1230
1231    How long BUFFER needs to be depends on the platform and the content
1232    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1233    bytes are sufficient.  Using more might be a good idea.
1234
1235    This function does not go through the hoops that long_to_string
1236    goes to because it doesn't aspire to be fast.  (It's called perhaps
1237    once in a Wget run.)  */
1238
1239 static void
1240 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1241 {
1242   int i = 0;
1243   int j;
1244
1245   /* Print the number backwards... */
1246   do
1247     {
1248       buffer[i++] = '0' + number % 10;
1249       number /= 10;
1250     }
1251   while (number);
1252
1253   /* ...and reverse the order of the digits. */
1254   for (j = 0; j < i / 2; j++)
1255     {
1256       char c = buffer[j];
1257       buffer[j] = buffer[i - 1 - j];
1258       buffer[i - 1 - j] = c;
1259     }
1260   buffer[i] = '\0';
1261 }
1262
1263 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1264 char *
1265 legible_very_long (VERY_LONG_TYPE l)
1266 {
1267   char inbuf[128];
1268   /* Print the number into the buffer.  */
1269   very_long_to_string (inbuf, l);
1270   return legible_1 (inbuf);
1271 }
1272
1273 /* Count the digits in a (long) integer.  */
1274 int
1275 numdigit (long number)
1276 {
1277   int cnt = 1;
1278   if (number < 0)
1279     {
1280       number = -number;
1281       ++cnt;
1282     }
1283   while ((number /= 10) > 0)
1284     ++cnt;
1285   return cnt;
1286 }
1287
1288 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1289 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1290
1291 #define DIGITS_1(figure) ONE_DIGIT (figure)
1292 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1293 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1294 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1295 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1296 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1297 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1298 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1299 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1300 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1301
1302 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1303
1304 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1305 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1306 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1307 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1308 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1309 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1310 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1311 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1312 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1313
1314 /* Print NUMBER to BUFFER in base 10.  This should be completely
1315    equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1316
1317    The speedup may make a difference in programs that frequently
1318    convert numbers to strings.  Some implementations of sprintf,
1319    particularly the one in GNU libc, have been known to be extremely
1320    slow compared to this function.
1321
1322    Return the pointer to the location where the terminating zero was
1323    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1324    function is done.)
1325
1326    BUFFER should be big enough to accept as many bytes as you expect
1327    the number to take up.  On machines with 64-bit longs the maximum
1328    needed size is 24 bytes.  That includes the digits needed for the
1329    largest 64-bit number, the `-' sign in case it's negative, and the
1330    terminating '\0'.  */
1331
1332 char *
1333 number_to_string (char *buffer, long number)
1334 {
1335   char *p = buffer;
1336   long n = number;
1337
1338 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1339   /* We are running in a strange or misconfigured environment.  Let
1340      sprintf cope with it.  */
1341   sprintf (buffer, "%ld", n);
1342   p += strlen (buffer);
1343 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1344
1345   if (n < 0)
1346     {
1347       *p++ = '-';
1348       n = -n;
1349     }
1350
1351   if      (n < 10)                   { DIGITS_1 (1); }
1352   else if (n < 100)                  { DIGITS_2 (10); }
1353   else if (n < 1000)                 { DIGITS_3 (100); }
1354   else if (n < 10000)                { DIGITS_4 (1000); }
1355   else if (n < 100000)               { DIGITS_5 (10000); }
1356   else if (n < 1000000)              { DIGITS_6 (100000); }
1357   else if (n < 10000000)             { DIGITS_7 (1000000); }
1358   else if (n < 100000000)            { DIGITS_8 (10000000); }
1359   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1360 #if SIZEOF_LONG == 4
1361   /* ``if (1)'' serves only to preserve editor indentation. */
1362   else if (1)                        { DIGITS_10 (1000000000); }
1363 #else  /* SIZEOF_LONG != 4 */
1364   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1365   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1366   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1367   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1368   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1369   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1370   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1371   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1372   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1373   else                               { DIGITS_19 (1000000000000000000L); }
1374 #endif /* SIZEOF_LONG != 4 */
1375
1376   *p = '\0';
1377 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1378
1379   return p;
1380 }
1381
1382 #undef ONE_DIGIT
1383 #undef ONE_DIGIT_ADVANCE
1384
1385 #undef DIGITS_1
1386 #undef DIGITS_2
1387 #undef DIGITS_3
1388 #undef DIGITS_4
1389 #undef DIGITS_5
1390 #undef DIGITS_6
1391 #undef DIGITS_7
1392 #undef DIGITS_8
1393 #undef DIGITS_9
1394 #undef DIGITS_10
1395 #undef DIGITS_11
1396 #undef DIGITS_12
1397 #undef DIGITS_13
1398 #undef DIGITS_14
1399 #undef DIGITS_15
1400 #undef DIGITS_16
1401 #undef DIGITS_17
1402 #undef DIGITS_18
1403 #undef DIGITS_19
1404 \f
1405 /* Support for timers. */
1406
1407 #undef TIMER_WINDOWS
1408 #undef TIMER_GETTIMEOFDAY
1409 #undef TIMER_TIME
1410
1411 /* Depending on the OS and availability of gettimeofday(), one and
1412    only one of the above constants will be defined.  Virtually all
1413    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1414    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1415    non-Windows systems without gettimeofday.
1416
1417    #### Perhaps we should also support ftime(), which exists on old
1418    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1419    C, if memory serves me.)  */
1420
1421 #ifdef WINDOWS
1422 # define TIMER_WINDOWS
1423 #else  /* not WINDOWS */
1424 # ifdef HAVE_GETTIMEOFDAY
1425 #  define TIMER_GETTIMEOFDAY
1426 # else
1427 #  define TIMER_TIME
1428 # endif
1429 #endif /* not WINDOWS */
1430
1431 struct wget_timer {
1432 #ifdef TIMER_GETTIMEOFDAY
1433   long secs;
1434   long usecs;
1435 #endif
1436
1437 #ifdef TIMER_TIME
1438   time_t secs;
1439 #endif
1440
1441 #ifdef TIMER_WINDOWS
1442   ULARGE_INTEGER wintime;
1443 #endif
1444 };
1445
1446 /* Allocate a timer.  It is not legal to do anything with a freshly
1447    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1448
1449 struct wget_timer *
1450 wtimer_allocate (void)
1451 {
1452   struct wget_timer *wt =
1453     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1454   return wt;
1455 }
1456
1457 /* Allocate a new timer and reset it.  Return the new timer. */
1458
1459 struct wget_timer *
1460 wtimer_new (void)
1461 {
1462   struct wget_timer *wt = wtimer_allocate ();
1463   wtimer_reset (wt);
1464   return wt;
1465 }
1466
1467 /* Free the resources associated with the timer.  Its further use is
1468    prohibited.  */
1469
1470 void
1471 wtimer_delete (struct wget_timer *wt)
1472 {
1473   xfree (wt);
1474 }
1475
1476 /* Reset timer WT.  This establishes the starting point from which
1477    wtimer_elapsed() will return the number of elapsed
1478    milliseconds.  It is allowed to reset a previously used timer.  */
1479
1480 void
1481 wtimer_reset (struct wget_timer *wt)
1482 {
1483 #ifdef TIMER_GETTIMEOFDAY
1484   struct timeval t;
1485   gettimeofday (&t, NULL);
1486   wt->secs  = t.tv_sec;
1487   wt->usecs = t.tv_usec;
1488 #endif
1489
1490 #ifdef TIMER_TIME
1491   wt->secs = time (NULL);
1492 #endif
1493
1494 #ifdef TIMER_WINDOWS
1495   FILETIME ft;
1496   SYSTEMTIME st;
1497   GetSystemTime (&st);
1498   SystemTimeToFileTime (&st, &ft);
1499   wt->wintime.HighPart = ft.dwHighDateTime;
1500   wt->wintime.LowPart  = ft.dwLowDateTime;
1501 #endif
1502 }
1503
1504 /* Return the number of milliseconds elapsed since the timer was last
1505    reset.  It is allowed to call this function more than once to get
1506    increasingly higher elapsed values.  */
1507
1508 long
1509 wtimer_elapsed (struct wget_timer *wt)
1510 {
1511 #ifdef TIMER_GETTIMEOFDAY
1512   struct timeval t;
1513   gettimeofday (&t, NULL);
1514   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1515 #endif
1516
1517 #ifdef TIMER_TIME
1518   time_t now = time (NULL);
1519   return 1000 * (now - wt->secs);
1520 #endif
1521
1522 #ifdef WINDOWS
1523   FILETIME ft;
1524   SYSTEMTIME st;
1525   ULARGE_INTEGER uli;
1526   GetSystemTime (&st);
1527   SystemTimeToFileTime (&st, &ft);
1528   uli.HighPart = ft.dwHighDateTime;
1529   uli.LowPart = ft.dwLowDateTime;
1530   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1531 #endif
1532 }
1533
1534 /* Return the assessed granularity of the timer implementation.  This
1535    is important for certain code that tries to deal with "zero" time
1536    intervals.  */
1537
1538 long
1539 wtimer_granularity (void)
1540 {
1541 #ifdef TIMER_GETTIMEOFDAY
1542   /* Granularity of gettimeofday is hugely architecture-dependent.
1543      However, it appears that on modern machines it is better than
1544      1ms.  */
1545   return 1;
1546 #endif
1547
1548 #ifdef TIMER_TIME
1549   /* This is clear. */
1550   return 1000;
1551 #endif
1552
1553 #ifdef TIMER_WINDOWS
1554   /* ? */
1555   return 1;
1556 #endif
1557 }
1558 \f
1559 /* This should probably be at a better place, but it doesn't really
1560    fit into html-parse.c.  */
1561
1562 /* The function returns the pointer to the malloc-ed quoted version of
1563    string s.  It will recognize and quote numeric and special graphic
1564    entities, as per RFC1866:
1565
1566    `&' -> `&amp;'
1567    `<' -> `&lt;'
1568    `>' -> `&gt;'
1569    `"' -> `&quot;'
1570    SP  -> `&#32;'
1571
1572    No other entities are recognized or replaced.  */
1573 char *
1574 html_quote_string (const char *s)
1575 {
1576   const char *b = s;
1577   char *p, *res;
1578   int i;
1579
1580   /* Pass through the string, and count the new size.  */
1581   for (i = 0; *s; s++, i++)
1582     {
1583       if (*s == '&')
1584         i += 4;                 /* `amp;' */
1585       else if (*s == '<' || *s == '>')
1586         i += 3;                 /* `lt;' and `gt;' */
1587       else if (*s == '\"')
1588         i += 5;                 /* `quot;' */
1589       else if (*s == ' ')
1590         i += 4;                 /* #32; */
1591     }
1592   res = (char *)xmalloc (i + 1);
1593   s = b;
1594   for (p = res; *s; s++)
1595     {
1596       switch (*s)
1597         {
1598         case '&':
1599           *p++ = '&';
1600           *p++ = 'a';
1601           *p++ = 'm';
1602           *p++ = 'p';
1603           *p++ = ';';
1604           break;
1605         case '<': case '>':
1606           *p++ = '&';
1607           *p++ = (*s == '<' ? 'l' : 'g');
1608           *p++ = 't';
1609           *p++ = ';';
1610           break;
1611         case '\"':
1612           *p++ = '&';
1613           *p++ = 'q';
1614           *p++ = 'u';
1615           *p++ = 'o';
1616           *p++ = 't';
1617           *p++ = ';';
1618           break;
1619         case ' ':
1620           *p++ = '&';
1621           *p++ = '#';
1622           *p++ = '3';
1623           *p++ = '2';
1624           *p++ = ';';
1625           break;
1626         default:
1627           *p++ = *s;
1628         }
1629     }
1630   *p = '\0';
1631   return res;
1632 }
1633
1634 /* Determine the width of the terminal we're running on.  If that's
1635    not possible, return 0.  */
1636
1637 int
1638 determine_screen_width (void)
1639 {
1640   /* If there's a way to get the terminal size using POSIX
1641      tcgetattr(), somebody please tell me.  */
1642 #ifndef TIOCGWINSZ
1643   return 0;
1644 #else  /* TIOCGWINSZ */
1645   int fd;
1646   struct winsize wsz;
1647
1648   if (opt.lfilename != NULL)
1649     return 0;
1650
1651   fd = fileno (stderr);
1652   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1653     return 0;                   /* most likely ENOTTY */
1654
1655   return wsz.ws_col;
1656 #endif /* TIOCGWINSZ */
1657 }
1658
1659 /* Return a random number between 0 and MAX-1, inclusive.
1660
1661    If MAX is greater than the value of RAND_MAX+1 on the system, the
1662    returned value will be in the range [0, RAND_MAX].  This may be
1663    fixed in a future release.
1664
1665    The random number generator is seeded automatically the first time
1666    it is called.
1667
1668    This uses rand() for portability.  It has been suggested that
1669    random() offers better randomness, but this is not required for
1670    Wget, so I chose to go for simplicity and use rand
1671    unconditionally.  */
1672
1673 int
1674 random_number (int max)
1675 {
1676   static int seeded;
1677   double bounded;
1678   int rnd;
1679
1680   if (!seeded)
1681     {
1682       srand (time (NULL));
1683       seeded = 1;
1684     }
1685   rnd = rand ();
1686
1687   /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1688      and enforce that assumption by masking other bits.  */
1689 #ifndef RAND_MAX
1690 # define RAND_MAX 32767
1691   rnd &= RAND_MAX;
1692 #endif
1693
1694   /* This is equivalent to rand() % max, but uses the high-order bits
1695      for better randomness on architecture where rand() is implemented
1696      using a simple congruential generator.  */
1697
1698   bounded = (double)max * rnd / (RAND_MAX + 1.0);
1699   return (int)bounded;
1700 }
1701
1702 #if 0
1703 /* A debugging function for checking whether an MD5 library works. */
1704
1705 #include "gen-md5.h"
1706
1707 char *
1708 debug_test_md5 (char *buf)
1709 {
1710   unsigned char raw[16];
1711   static char res[33];
1712   unsigned char *p1;
1713   char *p2;
1714   int cnt;
1715   ALLOCA_MD5_CONTEXT (ctx);
1716
1717   gen_md5_init (ctx);
1718   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1719   gen_md5_finish (ctx, raw);
1720
1721   p1 = raw;
1722   p2 = res;
1723   cnt = 16;
1724   while (cnt--)
1725     {
1726       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1727       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1728       ++p1;
1729     }
1730   *p2 = '\0';
1731
1732   return res;
1733 }
1734 #endif