sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 /* Needed for run_with_timeout. */
  63 #undef USE_SIGNAL_TIMEOUT
  64 #ifdef HAVE_SIGNAL_H
  65 # include <signal.h>
  66 #endif
  67 #ifdef HAVE_SETJMP_H
  68 # include <setjmp.h>
  69 #endif
  70 /* If sigsetjmp is a macro, configure won't pick it up. */
  71 #ifdef sigsetjmp
  72 # define HAVE_SIGSETJMP
  73 #endif
  74 #ifdef HAVE_SIGNAL
  75 # ifdef HAVE_SIGSETJMP
  76 #  define USE_SIGNAL_TIMEOUT
  77 # endif
  78 # ifdef HAVE_SIGBLOCK
  79 #  define USE_SIGNAL_TIMEOUT
  80 # endif
  81 #endif
  82
  83 #include "wget.h"
  84 #include "utils.h"
  85 #include "fnmatch.h"
  86 #include "hash.h"
  87
  88 #ifndef errno
  89 extern int errno;
  90 #endif
  91
  92 /* This section implements several wrappers around the basic
  93    allocation routines.  This is done for two reasons: first, so that
  94    the callers of these functions need not consistently check for
  95    errors.  If there is not enough virtual memory for running Wget,
  96    something is seriously wrong, and Wget exits with an appropriate
  97    error message.
  98
  99    The second reason why these are useful is that, if DEBUG_MALLOC is
 100    defined, they also provide a handy (if crude) malloc debugging
 101    interface that checks memory leaks.  */
 102
 103 /* Croak the fatal memory error and bail out with non-zero exit
 104    status.  */
 105 static void
 106 memfatal (const char *what)
 107 {
 108   /* Make sure we don't try to store part of the log line, and thus
 109      call malloc.  */
 110   log_set_save_context (0);
 111   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
 112   exit (1);
 113 }
 114
 115 /* These functions end with _real because they need to be
 116    distinguished from the debugging functions, and from the macros.
 117    Explanation follows:
 118
 119    If memory debugging is not turned on, wget.h defines these:
 120
 121      #define xmalloc xmalloc_real
 122      #define xrealloc xrealloc_real
 123      #define xstrdup xstrdup_real
 124      #define xfree free
 125
 126    In case of memory debugging, the definitions are a bit more
 127    complex, because we want to provide more information, *and* we want
 128    to call the debugging code.  (The former is the reason why xmalloc
 129    and friends need to be macros in the first place.)  Then it looks
 130    like this:
 131
 132      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 133      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 134      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 135      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 136
 137    Each of the *_debug function does its magic and calls the real one.  */
 138
 139 #ifdef DEBUG_MALLOC
 140 # define STATIC_IF_DEBUG static
 141 #else
 142 # define STATIC_IF_DEBUG
 143 #endif
 144
 145 STATIC_IF_DEBUG void *
 146 xmalloc_real (size_t size)
 147 {
 148   void *ptr = malloc (size);
 149   if (!ptr)
 150     memfatal ("malloc");
 151   return ptr;
 152 }
 153
 154 STATIC_IF_DEBUG void *
 155 xrealloc_real (void *ptr, size_t newsize)
 156 {
 157   void *newptr;
 158
 159   /* Not all Un*xes have the feature of realloc() that calling it with
 160      a NULL-pointer is the same as malloc(), but it is easy to
 161      simulate.  */
 162   if (ptr)
 163     newptr = realloc (ptr, newsize);
 164   else
 165     newptr = malloc (newsize);
 166   if (!newptr)
 167     memfatal ("realloc");
 168   return newptr;
 169 }
 170
 171 STATIC_IF_DEBUG char *
 172 xstrdup_real (const char *s)
 173 {
 174   char *copy;
 175
 176 #ifndef HAVE_STRDUP
 177   int l = strlen (s);
 178   copy = malloc (l + 1);
 179   if (!copy)
 180     memfatal ("strdup");
 181   memcpy (copy, s, l + 1);
 182 #else  /* HAVE_STRDUP */
 183   copy = strdup (s);
 184   if (!copy)
 185     memfatal ("strdup");
 186 #endif /* HAVE_STRDUP */
 187
 188   return copy;
 189 }
 190
 191 #ifdef DEBUG_MALLOC
 192
 193 /* Crude home-grown routines for debugging some malloc-related
 194    problems.  Featured:
 195
 196    * Counting the number of malloc and free invocations, and reporting
 197      the "balance", i.e. how many times more malloc was called than it
 198      was the case with free.
 199
 200    * Making malloc store its entry into a simple array and free remove
 201      stuff from that array.  At the end, print the pointers which have
 202      not been freed, along with the source file and the line number.
 203      This also has the side-effect of detecting freeing memory that
 204      was never allocated.
 205
 206    Note that this kind of memory leak checking strongly depends on
 207    every malloc() being followed by a free(), even if the program is
 208    about to finish.  Wget is careful to free the data structure it
 209    allocated in init.c.  */
 210
 211 static int malloc_count, free_count;
 212
 213 static struct {
 214   char *ptr;
 215   const char *file;
 216   int line;
 217 } malloc_debug[100000];
 218
 219 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 220    which can be a real problem.  It would be nice to use a hash table
 221    for malloc_debug, but the functions in hash.c are not suitable
 222    because they can call malloc() themselves.  Maybe it would work if
 223    the hash table were preallocated to a huge size, and if we set the
 224    rehash threshold to 1.0.  */
 225
 226 /* Register PTR in malloc_debug.  Abort if this is not possible
 227    (presumably due to the number of current allocations exceeding the
 228    size of malloc_debug.)  */
 229
 230 static void
 231 register_ptr (void *ptr, const char *file, int line)
 232 {
 233   int i;
 234   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 235     if (malloc_debug[i].ptr == NULL)
 236       {
 237         malloc_debug[i].ptr = ptr;
 238         malloc_debug[i].file = file;
 239         malloc_debug[i].line = line;
 240         return;
 241       }
 242   abort ();
 243 }
 244
 245 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 246    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 247
 248 static void
 249 unregister_ptr (void *ptr)
 250 {
 251   int i;
 252   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 253     if (malloc_debug[i].ptr == ptr)
 254       {
 255         malloc_debug[i].ptr = NULL;
 256         return;
 257       }
 258   abort ();
 259 }
 260
 261 /* Print the malloc debug stats that can be gathered from the above
 262    information.  Currently this is the count of mallocs, frees, the
 263    difference between the two, and the dump of the contents of
 264    malloc_debug.  The last part are the memory leaks.  */
 265
 266 void
 267 print_malloc_debug_stats (void)
 268 {
 269   int i;
 270   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 271           malloc_count, free_count, malloc_count - free_count);
 272   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 273     if (malloc_debug[i].ptr != NULL)
 274       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 275               malloc_debug[i].file, malloc_debug[i].line);
 276 }
 277
 278 void *
 279 xmalloc_debug (size_t size, const char *source_file, int source_line)
 280 {
 281   void *ptr = xmalloc_real (size);
 282   ++malloc_count;
 283   register_ptr (ptr, source_file, source_line);
 284   return ptr;
 285 }
 286
 287 void
 288 xfree_debug (void *ptr, const char *source_file, int source_line)
 289 {
 290   assert (ptr != NULL);
 291   ++free_count;
 292   unregister_ptr (ptr);
 293   free (ptr);
 294 }
 295
 296 void *
 297 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 298 {
 299   void *newptr = xrealloc_real (ptr, newsize);
 300   if (!ptr)
 301     {
 302       ++malloc_count;
 303       register_ptr (newptr, source_file, source_line);
 304     }
 305   else if (newptr != ptr)
 306     {
 307       unregister_ptr (ptr);
 308       register_ptr (newptr, source_file, source_line);
 309     }
 310   return newptr;
 311 }
 312
 313 char *
 314 xstrdup_debug (const char *s, const char *source_file, int source_line)
 315 {
 316   char *copy = xstrdup_real (s);
 317   ++malloc_count;
 318   register_ptr (copy, source_file, source_line);
 319   return copy;
 320 }
 321
 322 #endif /* DEBUG_MALLOC */
 323 \f
 324 /* Utility function: like xstrdup(), but also lowercases S.  */
 325
 326 char *
 327 xstrdup_lower (const char *s)
 328 {
 329   char *copy = xstrdup (s);
 330   char *p = copy;
 331   for (; *p; p++)
 332     *p = TOLOWER (*p);
 333   return copy;
 334 }
 335
 336 /* Return a count of how many times CHR occurs in STRING. */
 337
 338 int
 339 count_char (const char *string, char chr)
 340 {
 341   const char *p;
 342   int count = 0;
 343   for (p = string; *p; p++)
 344     if (*p == chr)
 345       ++count;
 346   return count;
 347 }
 348
 349 /* Copy the string formed by two pointers (one on the beginning, other
 350    on the char after the last char) to a new, malloc-ed location.
 351    0-terminate it.  */
 352 char *
 353 strdupdelim (const char *beg, const char *end)
 354 {
 355   char *res = (char *)xmalloc (end - beg + 1);
 356   memcpy (res, beg, end - beg);
 357   res[end - beg] = '\0';
 358   return res;
 359 }
 360
 361 /* Parse a string containing comma-separated elements, and return a
 362    vector of char pointers with the elements.  Spaces following the
 363    commas are ignored.  */
 364 char **
 365 sepstring (const char *s)
 366 {
 367   char **res;
 368   const char *p;
 369   int i = 0;
 370
 371   if (!s || !*s)
 372     return NULL;
 373   res = NULL;
 374   p = s;
 375   while (*s)
 376     {
 377       if (*s == ',')
 378         {
 379           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 380           res[i] = strdupdelim (p, s);
 381           res[++i] = NULL;
 382           ++s;
 383           /* Skip the blanks following the ','.  */
 384           while (ISSPACE (*s))
 385             ++s;
 386           p = s;
 387         }
 388       else
 389         ++s;
 390     }
 391   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 392   res[i] = strdupdelim (p, s);
 393   res[i + 1] = NULL;
 394   return res;
 395 }
 396 \f
 397 /* Return pointer to a static char[] buffer in which zero-terminated
 398    string-representation of TM (in form hh:mm:ss) is printed.
 399
 400    If TM is non-NULL, the current time-in-seconds will be stored
 401    there.
 402
 403    (#### This is misleading: one would expect TM would be used instead
 404    of the current time in that case.  This design was probably
 405    influenced by the design time(2), and should be changed at some
 406    points.  No callers use non-NULL TM anyway.)  */
 407
 408 char *
 409 time_str (time_t *tm)
 410 {
 411   static char output[15];
 412   struct tm *ptm;
 413   time_t secs = time (tm);
 414
 415   if (secs == -1)
 416     {
 417       /* In case of error, return the empty string.  Maybe we should
 418          just abort if this happens?  */
 419       *output = '\0';
 420       return output;
 421     }
 422   ptm = localtime (&secs);
 423   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 424   return output;
 425 }
 426
 427 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 428
 429 char *
 430 datetime_str (time_t *tm)
 431 {
 432   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 433   struct tm *ptm;
 434   time_t secs = time (tm);
 435
 436   if (secs == -1)
 437     {
 438       /* In case of error, return the empty string.  Maybe we should
 439          just abort if this happens?  */
 440       *output = '\0';
 441       return output;
 442     }
 443   ptm = localtime (&secs);
 444   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 445            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 446            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 447   return output;
 448 }
 449 \f
 450 /* The Windows versions of the following two functions are defined in
 451    mswindows.c.  */
 452
 453 #ifndef WINDOWS
 454 void
 455 fork_to_background (void)
 456 {
 457   pid_t pid;
 458   /* Whether we arrange our own version of opt.lfilename here.  */
 459   int changedp = 0;
 460
 461   if (!opt.lfilename)
 462     {
 463       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 464       changedp = 1;
 465     }
 466   pid = fork ();
 467   if (pid < 0)
 468     {
 469       /* parent, error */
 470       perror ("fork");
 471       exit (1);
 472     }
 473   else if (pid != 0)
 474     {
 475       /* parent, no error */
 476       printf (_("Continuing in background, pid %d.\n"), (int)pid);
 477       if (changedp)
 478         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 479       exit (0);                 /* #### should we use _exit()? */
 480     }
 481
 482   /* child: give up the privileges and keep running. */
 483   setsid ();
 484   freopen ("/dev/null", "r", stdin);
 485   freopen ("/dev/null", "w", stdout);
 486   freopen ("/dev/null", "w", stderr);
 487 }
 488 #endif /* not WINDOWS */
 489 \f
 490 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 491    specified with TM.  */
 492 void
 493 touch (const char *file, time_t tm)
 494 {
 495 #ifdef HAVE_STRUCT_UTIMBUF
 496   struct utimbuf times;
 497   times.actime = times.modtime = tm;
 498 #else
 499   time_t times[2];
 500   times[0] = times[1] = tm;
 501 #endif
 502
 503   if (utime (file, &times) == -1)
 504     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 505 }
 506
 507 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 508    nothing under MS-Windows.  */
 509 int
 510 remove_link (const char *file)
 511 {
 512   int err = 0;
 513   struct stat st;
 514
 515   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 516     {
 517       DEBUGP (("Unlinking %s (symlink).\n", file));
 518       err = unlink (file);
 519       if (err != 0)
 520         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 521                    file, strerror (errno));
 522     }
 523   return err;
 524 }
 525
 526 /* Does FILENAME exist?  This is quite a lousy implementation, since
 527    it supplies no error codes -- only a yes-or-no answer.  Thus it
 528    will return that a file does not exist if, e.g., the directory is
 529    unreadable.  I don't mind it too much currently, though.  The
 530    proper way should, of course, be to have a third, error state,
 531    other than true/false, but that would introduce uncalled-for
 532    additional complexity to the callers.  */
 533 int
 534 file_exists_p (const char *filename)
 535 {
 536 #ifdef HAVE_ACCESS
 537   return access (filename, F_OK) >= 0;
 538 #else
 539   struct stat buf;
 540   return stat (filename, &buf) >= 0;
 541 #endif
 542 }
 543
 544 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 545    Returns 0 on error.  */
 546 int
 547 file_non_directory_p (const char *path)
 548 {
 549   struct stat buf;
 550   /* Use lstat() rather than stat() so that symbolic links pointing to
 551      directories can be identified correctly.  */
 552   if (lstat (path, &buf) != 0)
 553     return 0;
 554   return S_ISDIR (buf.st_mode) ? 0 : 1;
 555 }
 556
 557 /* Return a unique filename, given a prefix and count */
 558 static char *
 559 unique_name_1 (const char *fileprefix, int count)
 560 {
 561   char *filename;
 562
 563   if (count)
 564     {
 565       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 566       sprintf (filename, "%s.%d", fileprefix, count);
 567     }
 568   else
 569     filename = xstrdup (fileprefix);
 570
 571   if (!file_exists_p (filename))
 572     return filename;
 573   else
 574     {
 575       xfree (filename);
 576       return NULL;
 577     }
 578 }
 579
 580 /* Return a unique file name, based on PREFIX.  */
 581 char *
 582 unique_name (const char *prefix)
 583 {
 584   char *file = NULL;
 585   int count = 0;
 586
 587   while (!file)
 588     file = unique_name_1 (prefix, count++);
 589   return file;
 590 }
 591 \f
 592 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 593    are missing, create them first.  In case any mkdir() call fails,
 594    return its error status.  Returns 0 on successful completion.
 595
 596    The behaviour of this function should be identical to the behaviour
 597    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 598 int
 599 make_directory (const char *directory)
 600 {
 601   int quit = 0;
 602   int i;
 603   int ret = 0;
 604   char *dir;
 605
 606   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 607      function is unsafe if called with a read-only char *argument.  */
 608   STRDUP_ALLOCA (dir, directory);
 609
 610   /* If the first character of dir is '/', skip it (and thus enable
 611      creation of absolute-pathname directories.  */
 612   for (i = (*dir == '/'); 1; ++i)
 613     {
 614       for (; dir[i] && dir[i] != '/'; i++)
 615         ;
 616       if (!dir[i])
 617         quit = 1;
 618       dir[i] = '\0';
 619       /* Check whether the directory already exists.  Allow creation of
 620          of intermediate directories to fail, as the initial path components
 621          are not necessarily directories!  */
 622       if (!file_exists_p (dir))
 623         ret = mkdir (dir, 0777);
 624       else
 625         ret = 0;
 626       if (quit)
 627         break;
 628       else
 629         dir[i] = '/';
 630     }
 631   return ret;
 632 }
 633
 634 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 635    should be a file name.
 636
 637    file_merge("/foo/bar", "baz")  => "/foo/baz"
 638    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 639    file_merge("foo", "bar")       => "bar"
 640
 641    In other words, it's a simpler and gentler version of uri_merge_1.  */
 642
 643 char *
 644 file_merge (const char *base, const char *file)
 645 {
 646   char *result;
 647   const char *cut = (const char *)strrchr (base, '/');
 648
 649   if (!cut)
 650     return xstrdup (file);
 651
 652   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 653   memcpy (result, base, cut - base);
 654   result[cut - base] = '/';
 655   strcpy (result + (cut - base) + 1, file);
 656
 657   return result;
 658 }
 659 \f
 660 static int in_acclist PARAMS ((const char *const *, const char *, int));
 661
 662 /* Determine whether a file is acceptable to be followed, according to
 663    lists of patterns to accept/reject.  */
 664 int
 665 acceptable (const char *s)
 666 {
 667   int l = strlen (s);
 668
 669   while (l && s[l] != '/')
 670     --l;
 671   if (s[l] == '/')
 672     s += (l + 1);
 673   if (opt.accepts)
 674     {
 675       if (opt.rejects)
 676         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 677                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 678       else
 679         return in_acclist ((const char *const *)opt.accepts, s, 1);
 680     }
 681   else if (opt.rejects)
 682     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 683   return 1;
 684 }
 685
 686 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 687    `/something', frontcmp() will return 1 only if S2 begins with
 688    `/something'.  Otherwise, 0 is returned.  */
 689 int
 690 frontcmp (const char *s1, const char *s2)
 691 {
 692   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 693   return !*s1;
 694 }
 695
 696 /* Iterate through STRLIST, and return the first element that matches
 697    S, through wildcards or front comparison (as appropriate).  */
 698 static char *
 699 proclist (char **strlist, const char *s, enum accd flags)
 700 {
 701   char **x;
 702
 703   for (x = strlist; *x; x++)
 704     if (has_wildcards_p (*x))
 705       {
 706         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 707           break;
 708       }
 709     else
 710       {
 711         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 712         if (frontcmp (p, s))
 713           break;
 714       }
 715   return *x;
 716 }
 717
 718 /* Returns whether DIRECTORY is acceptable for download, wrt the
 719    include/exclude lists.
 720
 721    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 722    and absolute paths may be freely intermixed.  */
 723 int
 724 accdir (const char *directory, enum accd flags)
 725 {
 726   /* Remove starting '/'.  */
 727   if (flags & ALLABS && *directory == '/')
 728     ++directory;
 729   if (opt.includes)
 730     {
 731       if (!proclist (opt.includes, directory, flags))
 732         return 0;
 733     }
 734   if (opt.excludes)
 735     {
 736       if (proclist (opt.excludes, directory, flags))
 737         return 0;
 738     }
 739   return 1;
 740 }
 741
 742 /* Match the end of STRING against PATTERN.  For instance:
 743
 744    match_backwards ("abc", "bc") -> 1
 745    match_backwards ("abc", "ab") -> 0
 746    match_backwards ("abc", "abc") -> 1 */
 747 int
 748 match_tail (const char *string, const char *pattern)
 749 {
 750   int i, j;
 751
 752   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 753     if (string[i] != pattern[j])
 754       break;
 755   /* If the pattern was exhausted, the match was succesful.  */
 756   if (j == -1)
 757     return 1;
 758   else
 759     return 0;
 760 }
 761
 762 /* Checks whether string S matches each element of ACCEPTS.  A list
 763    element are matched either with fnmatch() or match_tail(),
 764    according to whether the element contains wildcards or not.
 765
 766    If the BACKWARD is 0, don't do backward comparison -- just compare
 767    them normally.  */
 768 static int
 769 in_acclist (const char *const *accepts, const char *s, int backward)
 770 {
 771   for (; *accepts; accepts++)
 772     {
 773       if (has_wildcards_p (*accepts))
 774         {
 775           /* fnmatch returns 0 if the pattern *does* match the
 776              string.  */
 777           if (fnmatch (*accepts, s, 0) == 0)
 778             return 1;
 779         }
 780       else
 781         {
 782           if (backward)
 783             {
 784               if (match_tail (s, *accepts))
 785                 return 1;
 786             }
 787           else
 788             {
 789               if (!strcmp (s, *accepts))
 790                 return 1;
 791             }
 792         }
 793     }
 794   return 0;
 795 }
 796
 797 /* Return the location of STR's suffix (file extension).  Examples:
 798    suffix ("foo.bar")       -> "bar"
 799    suffix ("foo.bar.baz")   -> "baz"
 800    suffix ("/foo/bar")      -> NULL
 801    suffix ("/foo.bar/baz")  -> NULL  */
 802 char *
 803 suffix (const char *str)
 804 {
 805   int i;
 806
 807   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 808     ;
 809
 810   if (str[i++] == '.')
 811     return (char *)str + i;
 812   else
 813     return NULL;
 814 }
 815
 816 /* Return non-zero if FNAME ends with a typical HTML suffix.  The
 817    following (case-insensitive) suffixes are presumed to be HTML files:
 818
 819      html
 820      htm
 821      ?html (`?' matches one character)
 822
 823    #### CAVEAT.  This is not necessarily a good indication that FNAME
 824    refers to a file that contains HTML!  */
 825 int
 826 has_html_suffix_p (const char *fname)
 827 {
 828   char *suf;
 829
 830   if ((suf = suffix (fname)) == NULL)
 831     return 0;
 832   if (!strcasecmp (suf, "html"))
 833     return 1;
 834   if (!strcasecmp (suf, "htm"))
 835     return 1;
 836   if (suf[0] && !strcasecmp (suf + 1, "html"))
 837     return 1;
 838   return 0;
 839 }
 840
 841 /* Read a line from FP and return the pointer to freshly allocated
 842    storage.  The stoarage space is obtained through malloc() and
 843    should be freed with free() when it is no longer needed.
 844
 845    The length of the line is not limited, except by available memory.
 846    The newline character at the end of line is retained.  The line is
 847    terminated with a zero character.
 848
 849    After end-of-file is encountered without anything being read, NULL
 850    is returned.  NULL is also returned on error.  To distinguish
 851    between these two cases, use the stdio function ferror().  */
 852
 853 char *
 854 read_whole_line (FILE *fp)
 855 {
 856   int length = 0;
 857   int bufsize = 82;
 858   char *line = (char *)xmalloc (bufsize);
 859
 860   while (fgets (line + length, bufsize - length, fp))
 861     {
 862       length += strlen (line + length);
 863       if (length == 0)
 864         /* Possible for example when reading from a binary file where
 865            a line begins with \0.  */
 866         continue;
 867
 868       if (line[length - 1] == '\n')
 869         break;
 870
 871       /* fgets() guarantees to read the whole line, or to use up the
 872          space we've given it.  We can double the buffer
 873          unconditionally.  */
 874       bufsize <<= 1;
 875       line = xrealloc (line, bufsize);
 876     }
 877   if (length == 0 || ferror (fp))
 878     {
 879       xfree (line);
 880       return NULL;
 881     }
 882   if (length + 1 < bufsize)
 883     /* Relieve the memory from our exponential greediness.  We say
 884        `length + 1' because the terminating \0 is not included in
 885        LENGTH.  We don't need to zero-terminate the string ourselves,
 886        though, because fgets() does that.  */
 887     line = xrealloc (line, length + 1);
 888   return line;
 889 }
 890 \f
 891 /* Read FILE into memory.  A pointer to `struct file_memory' are
 892    returned; use struct element `content' to access file contents, and
 893    the element `length' to know the file length.  `content' is *not*
 894    zero-terminated, and you should *not* read or write beyond the [0,
 895    length) range of characters.
 896
 897    After you are done with the file contents, call read_file_free to
 898    release the memory.
 899
 900    Depending on the operating system and the type of file that is
 901    being read, read_file() either mmap's the file into memory, or
 902    reads the file into the core using read().
 903
 904    If file is named "-", fileno(stdin) is used for reading instead.
 905    If you want to read from a real file named "-", use "./-" instead.  */
 906
 907 struct file_memory *
 908 read_file (const char *file)
 909 {
 910   int fd;
 911   struct file_memory *fm;
 912   long size;
 913   int inhibit_close = 0;
 914
 915   /* Some magic in the finest tradition of Perl and its kin: if FILE
 916      is "-", just use stdin.  */
 917   if (HYPHENP (file))
 918     {
 919       fd = fileno (stdin);
 920       inhibit_close = 1;
 921       /* Note that we don't inhibit mmap() in this case.  If stdin is
 922          redirected from a regular file, mmap() will still work.  */
 923     }
 924   else
 925     fd = open (file, O_RDONLY);
 926   if (fd < 0)
 927     return NULL;
 928   fm = xmalloc (sizeof (struct file_memory));
 929
 930 #ifdef HAVE_MMAP
 931   {
 932     struct stat buf;
 933     if (fstat (fd, &buf) < 0)
 934       goto mmap_lose;
 935     fm->length = buf.st_size;
 936     /* NOTE: As far as I know, the callers of this function never
 937        modify the file text.  Relying on this would enable us to
 938        specify PROT_READ and MAP_SHARED for a marginal gain in
 939        efficiency, but at some cost to generality.  */
 940     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 941                         MAP_PRIVATE, fd, 0);
 942     if (fm->content == (char *)MAP_FAILED)
 943       goto mmap_lose;
 944     if (!inhibit_close)
 945       close (fd);
 946
 947     fm->mmap_p = 1;
 948     return fm;
 949   }
 950
 951  mmap_lose:
 952   /* The most common reason why mmap() fails is that FD does not point
 953      to a plain file.  However, it's also possible that mmap() doesn't
 954      work for a particular type of file.  Therefore, whenever mmap()
 955      fails, we just fall back to the regular method.  */
 956 #endif /* HAVE_MMAP */
 957
 958   fm->length = 0;
 959   size = 512;                   /* number of bytes fm->contents can
 960                                    hold at any given time. */
 961   fm->content = xmalloc (size);
 962   while (1)
 963     {
 964       long nread;
 965       if (fm->length > size / 2)
 966         {
 967           /* #### I'm not sure whether the whole exponential-growth
 968              thing makes sense with kernel read.  On Linux at least,
 969              read() refuses to read more than 4K from a file at a
 970              single chunk anyway.  But other Unixes might optimize it
 971              better, and it doesn't *hurt* anything, so I'm leaving
 972              it.  */
 973
 974           /* Normally, we grow SIZE exponentially to make the number
 975              of calls to read() and realloc() logarithmic in relation
 976              to file size.  However, read() can read an amount of data
 977              smaller than requested, and it would be unreasonably to
 978              double SIZE every time *something* was read.  Therefore,
 979              we double SIZE only when the length exceeds half of the
 980              entire allocated size.  */
 981           size <<= 1;
 982           fm->content = xrealloc (fm->content, size);
 983         }
 984       nread = read (fd, fm->content + fm->length, size - fm->length);
 985       if (nread > 0)
 986         /* Successful read. */
 987         fm->length += nread;
 988       else if (nread < 0)
 989         /* Error. */
 990         goto lose;
 991       else
 992         /* EOF */
 993         break;
 994     }
 995   if (!inhibit_close)
 996     close (fd);
 997   if (size > fm->length && fm->length != 0)
 998     /* Due to exponential growth of fm->content, the allocated region
 999        might be much larger than what is actually needed.  */
1000     fm->content = xrealloc (fm->content, fm->length);
1001   fm->mmap_p = 0;
1002   return fm;
1003
1004  lose:
1005   if (!inhibit_close)
1006     close (fd);
1007   xfree (fm->content);
1008   xfree (fm);
1009   return NULL;
1010 }
1011
1012 /* Release the resources held by FM.  Specifically, this calls
1013    munmap() or xfree() on fm->content, depending whether mmap or
1014    malloc/read were used to read in the file.  It also frees the
1015    memory needed to hold the FM structure itself.  */
1016
1017 void
1018 read_file_free (struct file_memory *fm)
1019 {
1020 #ifdef HAVE_MMAP
1021   if (fm->mmap_p)
1022     {
1023       munmap (fm->content, fm->length);
1024     }
1025   else
1026 #endif
1027     {
1028       xfree (fm->content);
1029     }
1030   xfree (fm);
1031 }
1032 \f
1033 /* Free the pointers in a NULL-terminated vector of pointers, then
1034    free the pointer itself.  */
1035 void
1036 free_vec (char **vec)
1037 {
1038   if (vec)
1039     {
1040       char **p = vec;
1041       while (*p)
1042         xfree (*p++);
1043       xfree (vec);
1044     }
1045 }
1046
1047 /* Append vector V2 to vector V1.  The function frees V2 and
1048    reallocates V1 (thus you may not use the contents of neither
1049    pointer after the call).  If V1 is NULL, V2 is returned.  */
1050 char **
1051 merge_vecs (char **v1, char **v2)
1052 {
1053   int i, j;
1054
1055   if (!v1)
1056     return v2;
1057   if (!v2)
1058     return v1;
1059   if (!*v2)
1060     {
1061       /* To avoid j == 0 */
1062       xfree (v2);
1063       return v1;
1064     }
1065   /* Count v1.  */
1066   for (i = 0; v1[i]; i++);
1067   /* Count v2.  */
1068   for (j = 0; v2[j]; j++);
1069   /* Reallocate v1.  */
1070   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1071   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1072   xfree (v2);
1073   return v1;
1074 }
1075
1076 /* A set of simple-minded routines to store strings in a linked list.
1077    This used to also be used for searching, but now we have hash
1078    tables for that.  */
1079
1080 /* It's a shame that these simple things like linked lists and hash
1081    tables (see hash.c) need to be implemented over and over again.  It
1082    would be nice to be able to use the routines from glib -- see
1083    www.gtk.org for details.  However, that would make Wget depend on
1084    glib, and I want to avoid dependencies to external libraries for
1085    reasons of convenience and portability (I suspect Wget is more
1086    portable than anything ever written for Gnome).  */
1087
1088 /* Append an element to the list.  If the list has a huge number of
1089    elements, this can get slow because it has to find the list's
1090    ending.  If you think you have to call slist_append in a loop,
1091    think about calling slist_prepend() followed by slist_nreverse().  */
1092
1093 slist *
1094 slist_append (slist *l, const char *s)
1095 {
1096   slist *newel = (slist *)xmalloc (sizeof (slist));
1097   slist *beg = l;
1098
1099   newel->string = xstrdup (s);
1100   newel->next = NULL;
1101
1102   if (!l)
1103     return newel;
1104   /* Find the last element.  */
1105   while (l->next)
1106     l = l->next;
1107   l->next = newel;
1108   return beg;
1109 }
1110
1111 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1112
1113 slist *
1114 slist_prepend (slist *l, const char *s)
1115 {
1116   slist *newel = (slist *)xmalloc (sizeof (slist));
1117   newel->string = xstrdup (s);
1118   newel->next = l;
1119   return newel;
1120 }
1121
1122 /* Destructively reverse L. */
1123
1124 slist *
1125 slist_nreverse (slist *l)
1126 {
1127   slist *prev = NULL;
1128   while (l)
1129     {
1130       slist *next = l->next;
1131       l->next = prev;
1132       prev = l;
1133       l = next;
1134     }
1135   return prev;
1136 }
1137
1138 /* Is there a specific entry in the list?  */
1139 int
1140 slist_contains (slist *l, const char *s)
1141 {
1142   for (; l; l = l->next)
1143     if (!strcmp (l->string, s))
1144       return 1;
1145   return 0;
1146 }
1147
1148 /* Free the whole slist.  */
1149 void
1150 slist_free (slist *l)
1151 {
1152   while (l)
1153     {
1154       slist *n = l->next;
1155       xfree (l->string);
1156       xfree (l);
1157       l = n;
1158     }
1159 }
1160 \f
1161 /* Sometimes it's useful to create "sets" of strings, i.e. special
1162    hash tables where you want to store strings as keys and merely
1163    query for their existence.  Here is a set of utility routines that
1164    makes that transparent.  */
1165
1166 void
1167 string_set_add (struct hash_table *ht, const char *s)
1168 {
1169   /* First check whether the set element already exists.  If it does,
1170      do nothing so that we don't have to free() the old element and
1171      then strdup() a new one.  */
1172   if (hash_table_contains (ht, s))
1173     return;
1174
1175   /* We use "1" as value.  It provides us a useful and clear arbitrary
1176      value, and it consumes no memory -- the pointers to the same
1177      string "1" will be shared by all the key-value pairs in all `set'
1178      hash tables.  */
1179   hash_table_put (ht, xstrdup (s), "1");
1180 }
1181
1182 /* Synonym for hash_table_contains... */
1183
1184 int
1185 string_set_contains (struct hash_table *ht, const char *s)
1186 {
1187   return hash_table_contains (ht, s);
1188 }
1189
1190 static int
1191 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1192 {
1193   xfree (key);
1194   return 0;
1195 }
1196
1197 void
1198 string_set_free (struct hash_table *ht)
1199 {
1200   hash_table_map (ht, string_set_free_mapper, NULL);
1201   hash_table_destroy (ht);
1202 }
1203
1204 static int
1205 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1206 {
1207   xfree (key);
1208   xfree (value);
1209   return 0;
1210 }
1211
1212 /* Another utility function: call free() on all keys and values of HT.  */
1213
1214 void
1215 free_keys_and_values (struct hash_table *ht)
1216 {
1217   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1218 }
1219
1220 \f
1221 /* Engine for legible and legible_very_long; this function works on
1222    strings.  */
1223
1224 static char *
1225 legible_1 (const char *repr)
1226 {
1227   static char outbuf[128];
1228   int i, i1, mod;
1229   char *outptr;
1230   const char *inptr;
1231
1232   /* Reset the pointers.  */
1233   outptr = outbuf;
1234   inptr = repr;
1235   /* If the number is negative, shift the pointers.  */
1236   if (*inptr == '-')
1237     {
1238       *outptr++ = '-';
1239       ++inptr;
1240     }
1241   /* How many digits before the first separator?  */
1242   mod = strlen (inptr) % 3;
1243   /* Insert them.  */
1244   for (i = 0; i < mod; i++)
1245     *outptr++ = inptr[i];
1246   /* Now insert the rest of them, putting separator before every
1247      third digit.  */
1248   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1249     {
1250       if (i % 3 == 0 && i1 != 0)
1251         *outptr++ = ',';
1252       *outptr++ = inptr[i1];
1253     }
1254   /* Zero-terminate the string.  */
1255   *outptr = '\0';
1256   return outbuf;
1257 }
1258
1259 /* Legible -- return a static pointer to the legibly printed long.  */
1260 char *
1261 legible (long l)
1262 {
1263   char inbuf[24];
1264   /* Print the number into the buffer.  */
1265   number_to_string (inbuf, l);
1266   return legible_1 (inbuf);
1267 }
1268
1269 /* Write a string representation of NUMBER into the provided buffer.
1270    We cannot use sprintf() because we cannot be sure whether the
1271    platform supports printing of what we chose for VERY_LONG_TYPE.
1272
1273    Example: Gcc supports `long long' under many platforms, but on many
1274    of those the native libc knows nothing of it and therefore cannot
1275    print it.
1276
1277    How long BUFFER needs to be depends on the platform and the content
1278    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1279    bytes are sufficient.  Using more might be a good idea.
1280
1281    This function does not go through the hoops that long_to_string
1282    goes to because it doesn't aspire to be fast.  (It's called perhaps
1283    once in a Wget run.)  */
1284
1285 static void
1286 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1287 {
1288   int i = 0;
1289   int j;
1290
1291   /* Print the number backwards... */
1292   do
1293     {
1294       buffer[i++] = '0' + number % 10;
1295       number /= 10;
1296     }
1297   while (number);
1298
1299   /* ...and reverse the order of the digits. */
1300   for (j = 0; j < i / 2; j++)
1301     {
1302       char c = buffer[j];
1303       buffer[j] = buffer[i - 1 - j];
1304       buffer[i - 1 - j] = c;
1305     }
1306   buffer[i] = '\0';
1307 }
1308
1309 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1310 char *
1311 legible_very_long (VERY_LONG_TYPE l)
1312 {
1313   char inbuf[128];
1314   /* Print the number into the buffer.  */
1315   very_long_to_string (inbuf, l);
1316   return legible_1 (inbuf);
1317 }
1318
1319 /* Count the digits in a (long) integer.  */
1320 int
1321 numdigit (long number)
1322 {
1323   int cnt = 1;
1324   if (number < 0)
1325     {
1326       number = -number;
1327       ++cnt;
1328     }
1329   while ((number /= 10) > 0)
1330     ++cnt;
1331   return cnt;
1332 }
1333
1334 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1335 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1336
1337 #define DIGITS_1(figure) ONE_DIGIT (figure)
1338 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1339 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1340 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1341 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1342 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1343 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1344 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1345 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1346 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1347
1348 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1349
1350 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1351 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1352 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1353 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1354 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1355 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1356 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1357 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1358 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1359
1360 /* Print NUMBER to BUFFER in base 10.  This should be completely
1361    equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1362
1363    The speedup may make a difference in programs that frequently
1364    convert numbers to strings.  Some implementations of sprintf,
1365    particularly the one in GNU libc, have been known to be extremely
1366    slow compared to this function.
1367
1368    Return the pointer to the location where the terminating zero was
1369    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1370    function is done.)
1371
1372    BUFFER should be big enough to accept as many bytes as you expect
1373    the number to take up.  On machines with 64-bit longs the maximum
1374    needed size is 24 bytes.  That includes the digits needed for the
1375    largest 64-bit number, the `-' sign in case it's negative, and the
1376    terminating '\0'.  */
1377
1378 char *
1379 number_to_string (char *buffer, long number)
1380 {
1381   char *p = buffer;
1382   long n = number;
1383
1384 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1385   /* We are running in a strange or misconfigured environment.  Let
1386      sprintf cope with it.  */
1387   sprintf (buffer, "%ld", n);
1388   p += strlen (buffer);
1389 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1390
1391   if (n < 0)
1392     {
1393       *p++ = '-';
1394       n = -n;
1395     }
1396
1397   if      (n < 10)                   { DIGITS_1 (1); }
1398   else if (n < 100)                  { DIGITS_2 (10); }
1399   else if (n < 1000)                 { DIGITS_3 (100); }
1400   else if (n < 10000)                { DIGITS_4 (1000); }
1401   else if (n < 100000)               { DIGITS_5 (10000); }
1402   else if (n < 1000000)              { DIGITS_6 (100000); }
1403   else if (n < 10000000)             { DIGITS_7 (1000000); }
1404   else if (n < 100000000)            { DIGITS_8 (10000000); }
1405   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1406 #if SIZEOF_LONG == 4
1407   /* ``if (1)'' serves only to preserve editor indentation. */
1408   else if (1)                        { DIGITS_10 (1000000000); }
1409 #else  /* SIZEOF_LONG != 4 */
1410   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1411   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1412   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1413   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1414   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1415   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1416   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1417   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1418   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1419   else                               { DIGITS_19 (1000000000000000000L); }
1420 #endif /* SIZEOF_LONG != 4 */
1421
1422   *p = '\0';
1423 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1424
1425   return p;
1426 }
1427
1428 #undef ONE_DIGIT
1429 #undef ONE_DIGIT_ADVANCE
1430
1431 #undef DIGITS_1
1432 #undef DIGITS_2
1433 #undef DIGITS_3
1434 #undef DIGITS_4
1435 #undef DIGITS_5
1436 #undef DIGITS_6
1437 #undef DIGITS_7
1438 #undef DIGITS_8
1439 #undef DIGITS_9
1440 #undef DIGITS_10
1441 #undef DIGITS_11
1442 #undef DIGITS_12
1443 #undef DIGITS_13
1444 #undef DIGITS_14
1445 #undef DIGITS_15
1446 #undef DIGITS_16
1447 #undef DIGITS_17
1448 #undef DIGITS_18
1449 #undef DIGITS_19
1450 \f
1451 /* Support for timers. */
1452
1453 #undef TIMER_WINDOWS
1454 #undef TIMER_GETTIMEOFDAY
1455 #undef TIMER_TIME
1456
1457 /* Depending on the OS and availability of gettimeofday(), one and
1458    only one of the above constants will be defined.  Virtually all
1459    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1460    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1461    non-Windows systems without gettimeofday.
1462
1463    #### Perhaps we should also support ftime(), which exists on old
1464    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1465    C, if memory serves me.)  */
1466
1467 #ifdef WINDOWS
1468 # define TIMER_WINDOWS
1469 #else  /* not WINDOWS */
1470 # ifdef HAVE_GETTIMEOFDAY
1471 #  define TIMER_GETTIMEOFDAY
1472 # else
1473 #  define TIMER_TIME
1474 # endif
1475 #endif /* not WINDOWS */
1476
1477 struct wget_timer {
1478 #ifdef TIMER_GETTIMEOFDAY
1479   long secs;
1480   long usecs;
1481 #endif
1482
1483 #ifdef TIMER_TIME
1484   time_t secs;
1485 #endif
1486
1487 #ifdef TIMER_WINDOWS
1488   ULARGE_INTEGER wintime;
1489 #endif
1490 };
1491
1492 /* Allocate a timer.  It is not legal to do anything with a freshly
1493    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1494
1495 struct wget_timer *
1496 wtimer_allocate (void)
1497 {
1498   struct wget_timer *wt =
1499     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1500   return wt;
1501 }
1502
1503 /* Allocate a new timer and reset it.  Return the new timer. */
1504
1505 struct wget_timer *
1506 wtimer_new (void)
1507 {
1508   struct wget_timer *wt = wtimer_allocate ();
1509   wtimer_reset (wt);
1510   return wt;
1511 }
1512
1513 /* Free the resources associated with the timer.  Its further use is
1514    prohibited.  */
1515
1516 void
1517 wtimer_delete (struct wget_timer *wt)
1518 {
1519   xfree (wt);
1520 }
1521
1522 /* Reset timer WT.  This establishes the starting point from which
1523    wtimer_elapsed() will return the number of elapsed
1524    milliseconds.  It is allowed to reset a previously used timer.  */
1525
1526 void
1527 wtimer_reset (struct wget_timer *wt)
1528 {
1529 #ifdef TIMER_GETTIMEOFDAY
1530   struct timeval t;
1531   gettimeofday (&t, NULL);
1532   wt->secs  = t.tv_sec;
1533   wt->usecs = t.tv_usec;
1534 #endif
1535
1536 #ifdef TIMER_TIME
1537   wt->secs = time (NULL);
1538 #endif
1539
1540 #ifdef TIMER_WINDOWS
1541   FILETIME ft;
1542   SYSTEMTIME st;
1543   GetSystemTime (&st);
1544   SystemTimeToFileTime (&st, &ft);
1545   wt->wintime.HighPart = ft.dwHighDateTime;
1546   wt->wintime.LowPart  = ft.dwLowDateTime;
1547 #endif
1548 }
1549
1550 /* Return the number of milliseconds elapsed since the timer was last
1551    reset.  It is allowed to call this function more than once to get
1552    increasingly higher elapsed values.  */
1553
1554 long
1555 wtimer_elapsed (struct wget_timer *wt)
1556 {
1557 #ifdef TIMER_GETTIMEOFDAY
1558   struct timeval t;
1559   gettimeofday (&t, NULL);
1560   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1561 #endif
1562
1563 #ifdef TIMER_TIME
1564   time_t now = time (NULL);
1565   return 1000 * (now - wt->secs);
1566 #endif
1567
1568 #ifdef WINDOWS
1569   FILETIME ft;
1570   SYSTEMTIME st;
1571   ULARGE_INTEGER uli;
1572   GetSystemTime (&st);
1573   SystemTimeToFileTime (&st, &ft);
1574   uli.HighPart = ft.dwHighDateTime;
1575   uli.LowPart = ft.dwLowDateTime;
1576   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1577 #endif
1578 }
1579
1580 /* Return the assessed granularity of the timer implementation.  This
1581    is important for certain code that tries to deal with "zero" time
1582    intervals.  */
1583
1584 long
1585 wtimer_granularity (void)
1586 {
1587 #ifdef TIMER_GETTIMEOFDAY
1588   /* Granularity of gettimeofday is hugely architecture-dependent.
1589      However, it appears that on modern machines it is better than
1590      1ms.  */
1591   return 1;
1592 #endif
1593
1594 #ifdef TIMER_TIME
1595   /* This is clear. */
1596   return 1000;
1597 #endif
1598
1599 #ifdef TIMER_WINDOWS
1600   /* ? */
1601   return 1;
1602 #endif
1603 }
1604 \f
1605 /* This should probably be at a better place, but it doesn't really
1606    fit into html-parse.c.  */
1607
1608 /* The function returns the pointer to the malloc-ed quoted version of
1609    string s.  It will recognize and quote numeric and special graphic
1610    entities, as per RFC1866:
1611
1612    `&' -> `&amp;'
1613    `<' -> `&lt;'
1614    `>' -> `&gt;'
1615    `"' -> `&quot;'
1616    SP  -> `&#32;'
1617
1618    No other entities are recognized or replaced.  */
1619 char *
1620 html_quote_string (const char *s)
1621 {
1622   const char *b = s;
1623   char *p, *res;
1624   int i;
1625
1626   /* Pass through the string, and count the new size.  */
1627   for (i = 0; *s; s++, i++)
1628     {
1629       if (*s == '&')
1630         i += 4;                 /* `amp;' */
1631       else if (*s == '<' || *s == '>')
1632         i += 3;                 /* `lt;' and `gt;' */
1633       else if (*s == '\"')
1634         i += 5;                 /* `quot;' */
1635       else if (*s == ' ')
1636         i += 4;                 /* #32; */
1637     }
1638   res = (char *)xmalloc (i + 1);
1639   s = b;
1640   for (p = res; *s; s++)
1641     {
1642       switch (*s)
1643         {
1644         case '&':
1645           *p++ = '&';
1646           *p++ = 'a';
1647           *p++ = 'm';
1648           *p++ = 'p';
1649           *p++ = ';';
1650           break;
1651         case '<': case '>':
1652           *p++ = '&';
1653           *p++ = (*s == '<' ? 'l' : 'g');
1654           *p++ = 't';
1655           *p++ = ';';
1656           break;
1657         case '\"':
1658           *p++ = '&';
1659           *p++ = 'q';
1660           *p++ = 'u';
1661           *p++ = 'o';
1662           *p++ = 't';
1663           *p++ = ';';
1664           break;
1665         case ' ':
1666           *p++ = '&';
1667           *p++ = '#';
1668           *p++ = '3';
1669           *p++ = '2';
1670           *p++ = ';';
1671           break;
1672         default:
1673           *p++ = *s;
1674         }
1675     }
1676   *p = '\0';
1677   return res;
1678 }
1679
1680 /* Determine the width of the terminal we're running on.  If that's
1681    not possible, return 0.  */
1682
1683 int
1684 determine_screen_width (void)
1685 {
1686   /* If there's a way to get the terminal size using POSIX
1687      tcgetattr(), somebody please tell me.  */
1688 #ifndef TIOCGWINSZ
1689   return 0;
1690 #else  /* TIOCGWINSZ */
1691   int fd;
1692   struct winsize wsz;
1693
1694   if (opt.lfilename != NULL)
1695     return 0;
1696
1697   fd = fileno (stderr);
1698   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1699     return 0;                   /* most likely ENOTTY */
1700
1701   return wsz.ws_col;
1702 #endif /* TIOCGWINSZ */
1703 }
1704
1705 /* Return a random number between 0 and MAX-1, inclusive.
1706
1707    If MAX is greater than the value of RAND_MAX+1 on the system, the
1708    returned value will be in the range [0, RAND_MAX].  This may be
1709    fixed in a future release.
1710
1711    The random number generator is seeded automatically the first time
1712    it is called.
1713
1714    This uses rand() for portability.  It has been suggested that
1715    random() offers better randomness, but this is not required for
1716    Wget, so I chose to go for simplicity and use rand
1717    unconditionally.  */
1718
1719 int
1720 random_number (int max)
1721 {
1722   static int seeded;
1723   double bounded;
1724   int rnd;
1725
1726   if (!seeded)
1727     {
1728       srand (time (NULL));
1729       seeded = 1;
1730     }
1731   rnd = rand ();
1732
1733   /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1734      and enforce that assumption by masking other bits.  */
1735 #ifndef RAND_MAX
1736 # define RAND_MAX 32767
1737   rnd &= RAND_MAX;
1738 #endif
1739
1740   /* This is equivalent to rand() % max, but uses the high-order bits
1741      for better randomness on architecture where rand() is implemented
1742      using a simple congruential generator.  */
1743
1744   bounded = (double)max * rnd / (RAND_MAX + 1.0);
1745   return (int)bounded;
1746 }
1747
1748 #if 0
1749 /* A debugging function for checking whether an MD5 library works. */
1750
1751 #include "gen-md5.h"
1752
1753 char *
1754 debug_test_md5 (char *buf)
1755 {
1756   unsigned char raw[16];
1757   static char res[33];
1758   unsigned char *p1;
1759   char *p2;
1760   int cnt;
1761   ALLOCA_MD5_CONTEXT (ctx);
1762
1763   gen_md5_init (ctx);
1764   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1765   gen_md5_finish (ctx, raw);
1766
1767   p1 = raw;
1768   p2 = res;
1769   cnt = 16;
1770   while (cnt--)
1771     {
1772       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1773       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1774       ++p1;
1775     }
1776   *p2 = '\0';
1777
1778   return res;
1779 }
1780 #endif
1781 \f
1782 /* Implementation of run_with_timeout, a generic timeout handler for
1783    systems with Unix-like signal handling.  */
1784 #ifdef HAVE_SIGSETJMP
1785 #define SETJMP(env) sigsetjmp (env, 1)
1786
1787 static sigjmp_buf run_with_timeout_env;
1788
1789 static RETSIGTYPE
1790 abort_run_with_timeout (int sig)
1791 {
1792   assert (sig == SIGALRM);
1793   siglongjmp (run_with_timeout_env, -1);
1794 }
1795 #else  /* not HAVE_SIGSETJMP */
1796 #define SETJMP(env) setjmp (env)
1797
1798 static jmp_buf run_with_timeout_env;
1799
1800 static RETSIGTYPE
1801 abort_run_with_timeout (int sig)
1802 {
1803   assert (sig == SIGALRM);
1804   /* We don't have siglongjmp to preserve the set of blocked signals;
1805      if we longjumped out of the handler at this point, SIGALRM would
1806      remain blocked.  We must unblock it manually. */
1807   int mask = siggetmask ();
1808   mask &= ~sigmask(SIGALRM);
1809   sigsetmask (mask);
1810
1811   /* Now it's safe to longjump. */
1812   longjmp (run_with_timeout_env, -1);
1813 }
1814 #endif /* not HAVE_SIGSETJMP */
1815
1816 int
1817 run_with_timeout (long timeout, void (*fun) (void *), void *arg)
1818 {
1819 #ifndef USE_SIGNAL_TIMEOUT
1820   fun (arg);
1821   return 0;
1822 #else
1823   int saved_errno;
1824
1825   if (timeout == 0)
1826     {
1827       fun (arg);
1828       return 0;
1829     }
1830
1831   signal (SIGALRM, abort_run_with_timeout);
1832   if (SETJMP (run_with_timeout_env) != 0)
1833     {
1834       /* Longjumped out of FUN with a timeout. */
1835       signal (SIGALRM, SIG_DFL);
1836       return 1;
1837     }
1838   alarm (timeout);
1839   fun (arg);
1840
1841   /* Preserve errno in case alarm() or signal() modifies it. */
1842   saved_errno = errno;
1843   alarm (0);
1844   signal (SIGALRM, SIG_DFL);
1845   errno = saved_errno;
1846
1847   return 0;
1848 #endif
1849 }
1850