sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 #include "wget.h"
  55 #include "utils.h"
  56 #include "fnmatch.h"
  57 #include "hash.h"
  58
  59 #ifndef errno
  60 extern int errno;
  61 #endif
  62
  63 /* This section implements several wrappers around the basic
  64    allocation routines.  This is done for two reasons: first, so that
  65    the callers of these functions need not consistently check for
  66    errors.  If there is not enough virtual memory for running Wget,
  67    something is seriously wrong, and Wget exits with an appropriate
  68    error message.
  69
  70    The second reason why these are useful is that, if DEBUG_MALLOC is
  71    defined, they also provide a handy (if crude) malloc debugging
  72    interface that checks memory leaks.  */
  73
  74 /* Croak the fatal memory error and bail out with non-zero exit
  75    status.  */
  76 static void
  77 memfatal (const char *what)
  78 {
  79   /* HACK: expose save_log_p from log.c, so we can turn it off in
  80      order to prevent saving the log.  Saving the log is dangerous
  81      because logprintf() and logputs() can call malloc(), so this
  82      could infloop.  When logging is turned off, infloop can no longer
  83      happen.
  84
  85      #### This is no longer really necessary because the new routines
  86      in log.c cons only if the line exceeds eighty characters.  But
  87      this can come at the end of a line, so it's OK to be careful.
  88
  89      On a more serious note, it would be good to have a
  90      log_forced_shutdown() routine that exposes this cleanly.  */
  91   extern int save_log_p;
  92
  93   save_log_p = 0;
  94   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  95   exit (1);
  96 }
  97
  98 /* These functions end with _real because they need to be
  99    distinguished from the debugging functions, and from the macros.
 100    Explanation follows:
 101
 102    If memory debugging is not turned on, wget.h defines these:
 103
 104      #define xmalloc xmalloc_real
 105      #define xrealloc xrealloc_real
 106      #define xstrdup xstrdup_real
 107      #define xfree free
 108
 109    In case of memory debugging, the definitions are a bit more
 110    complex, because we want to provide more information, *and* we want
 111    to call the debugging code.  (The former is the reason why xmalloc
 112    and friends need to be macros in the first place.)  Then it looks
 113    like this:
 114
 115      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 116      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 117      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 118      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 119
 120    Each of the *_debug function does its magic and calls the real one.  */
 121
 122 #ifdef DEBUG_MALLOC
 123 # define STATIC_IF_DEBUG static
 124 #else
 125 # define STATIC_IF_DEBUG
 126 #endif
 127
 128 STATIC_IF_DEBUG void *
 129 xmalloc_real (size_t size)
 130 {
 131   void *ptr = malloc (size);
 132   if (!ptr)
 133     memfatal ("malloc");
 134   return ptr;
 135 }
 136
 137 STATIC_IF_DEBUG void *
 138 xrealloc_real (void *ptr, size_t newsize)
 139 {
 140   void *newptr;
 141
 142   /* Not all Un*xes have the feature of realloc() that calling it with
 143      a NULL-pointer is the same as malloc(), but it is easy to
 144      simulate.  */
 145   if (ptr)
 146     newptr = realloc (ptr, newsize);
 147   else
 148     newptr = malloc (newsize);
 149   if (!newptr)
 150     memfatal ("realloc");
 151   return newptr;
 152 }
 153
 154 STATIC_IF_DEBUG char *
 155 xstrdup_real (const char *s)
 156 {
 157   char *copy;
 158
 159 #ifndef HAVE_STRDUP
 160   int l = strlen (s);
 161   copy = malloc (l + 1);
 162   if (!copy)
 163     memfatal ("strdup");
 164   memcpy (copy, s, l + 1);
 165 #else  /* HAVE_STRDUP */
 166   copy = strdup (s);
 167   if (!copy)
 168     memfatal ("strdup");
 169 #endif /* HAVE_STRDUP */
 170
 171   return copy;
 172 }
 173
 174 #ifdef DEBUG_MALLOC
 175
 176 /* Crude home-grown routines for debugging some malloc-related
 177    problems.  Featured:
 178
 179    * Counting the number of malloc and free invocations, and reporting
 180      the "balance", i.e. how many times more malloc was called than it
 181      was the case with free.
 182
 183    * Making malloc store its entry into a simple array and free remove
 184      stuff from that array.  At the end, print the pointers which have
 185      not been freed, along with the source file and the line number.
 186      This also has the side-effect of detecting freeing memory that
 187      was never allocated.
 188
 189    Note that this kind of memory leak checking strongly depends on
 190    every malloc() being followed by a free(), even if the program is
 191    about to finish.  Wget is careful to free the data structure it
 192    allocated in init.c.  */
 193
 194 static int malloc_count, free_count;
 195
 196 static struct {
 197   char *ptr;
 198   const char *file;
 199   int line;
 200 } malloc_debug[100000];
 201
 202 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 203    which can be a real problem.  It would be nice to use a hash table
 204    for malloc_debug, but the functions in hash.c are not suitable
 205    because they can call malloc() themselves.  Maybe it would work if
 206    the hash table were preallocated to a huge size, and if we set the
 207    rehash threshold to 1.0.  */
 208
 209 /* Register PTR in malloc_debug.  Abort if this is not possible
 210    (presumably due to the number of current allocations exceeding the
 211    size of malloc_debug.)  */
 212
 213 static void
 214 register_ptr (void *ptr, const char *file, int line)
 215 {
 216   int i;
 217   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 218     if (malloc_debug[i].ptr == NULL)
 219       {
 220         malloc_debug[i].ptr = ptr;
 221         malloc_debug[i].file = file;
 222         malloc_debug[i].line = line;
 223         return;
 224       }
 225   abort ();
 226 }
 227
 228 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 229    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 230
 231 static void
 232 unregister_ptr (void *ptr)
 233 {
 234   int i;
 235   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 236     if (malloc_debug[i].ptr == ptr)
 237       {
 238         malloc_debug[i].ptr = NULL;
 239         return;
 240       }
 241   abort ();
 242 }
 243
 244 /* Print the malloc debug stats that can be gathered from the above
 245    information.  Currently this is the count of mallocs, frees, the
 246    difference between the two, and the dump of the contents of
 247    malloc_debug.  The last part are the memory leaks.  */
 248
 249 void
 250 print_malloc_debug_stats (void)
 251 {
 252   int i;
 253   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 254           malloc_count, free_count, malloc_count - free_count);
 255   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 256     if (malloc_debug[i].ptr != NULL)
 257       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 258               malloc_debug[i].file, malloc_debug[i].line);
 259 }
 260
 261 void *
 262 xmalloc_debug (size_t size, const char *source_file, int source_line)
 263 {
 264   void *ptr = xmalloc_real (size);
 265   ++malloc_count;
 266   register_ptr (ptr, source_file, source_line);
 267   return ptr;
 268 }
 269
 270 void
 271 xfree_debug (void *ptr, const char *source_file, int source_line)
 272 {
 273   assert (ptr != NULL);
 274   ++free_count;
 275   unregister_ptr (ptr);
 276   free (ptr);
 277 }
 278
 279 void *
 280 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 281 {
 282   void *newptr = xrealloc_real (ptr, newsize);
 283   if (!ptr)
 284     {
 285       ++malloc_count;
 286       register_ptr (newptr, source_file, source_line);
 287     }
 288   else if (newptr != ptr)
 289     {
 290       unregister_ptr (ptr);
 291       register_ptr (newptr, source_file, source_line);
 292     }
 293   return newptr;
 294 }
 295
 296 char *
 297 xstrdup_debug (const char *s, const char *source_file, int source_line)
 298 {
 299   char *copy = xstrdup_real (s);
 300   ++malloc_count;
 301   register_ptr (copy, source_file, source_line);
 302   return copy;
 303 }
 304
 305 #endif /* DEBUG_MALLOC */
 306 \f
 307 /* Copy the string formed by two pointers (one on the beginning, other
 308    on the char after the last char) to a new, malloc-ed location.
 309    0-terminate it.  */
 310 char *
 311 strdupdelim (const char *beg, const char *end)
 312 {
 313   char *res = (char *)xmalloc (end - beg + 1);
 314   memcpy (res, beg, end - beg);
 315   res[end - beg] = '\0';
 316   return res;
 317 }
 318
 319 /* Parse a string containing comma-separated elements, and return a
 320    vector of char pointers with the elements.  Spaces following the
 321    commas are ignored.  */
 322 char **
 323 sepstring (const char *s)
 324 {
 325   char **res;
 326   const char *p;
 327   int i = 0;
 328
 329   if (!s || !*s)
 330     return NULL;
 331   res = NULL;
 332   p = s;
 333   while (*s)
 334     {
 335       if (*s == ',')
 336         {
 337           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 338           res[i] = strdupdelim (p, s);
 339           res[++i] = NULL;
 340           ++s;
 341           /* Skip the blanks following the ','.  */
 342           while (ISSPACE (*s))
 343             ++s;
 344           p = s;
 345         }
 346       else
 347         ++s;
 348     }
 349   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 350   res[i] = strdupdelim (p, s);
 351   res[i + 1] = NULL;
 352   return res;
 353 }
 354 \f
 355 /* Return pointer to a static char[] buffer in which zero-terminated
 356    string-representation of TM (in form hh:mm:ss) is printed.
 357
 358    If TM is non-NULL, the current time-in-seconds will be stored
 359    there.
 360
 361    (#### This is misleading: one would expect TM would be used instead
 362    of the current time in that case.  This design was probably
 363    influenced by the design time(2), and should be changed at some
 364    points.  No callers use non-NULL TM anyway.)  */
 365
 366 char *
 367 time_str (time_t *tm)
 368 {
 369   static char output[15];
 370   struct tm *ptm;
 371   time_t secs = time (tm);
 372
 373   if (secs == -1)
 374     {
 375       /* In case of error, return the empty string.  Maybe we should
 376          just abort if this happens?  */
 377       *output = '\0';
 378       return output;
 379     }
 380   ptm = localtime (&secs);
 381   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 382   return output;
 383 }
 384
 385 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 386
 387 char *
 388 datetime_str (time_t *tm)
 389 {
 390   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 391   struct tm *ptm;
 392   time_t secs = time (tm);
 393
 394   if (secs == -1)
 395     {
 396       /* In case of error, return the empty string.  Maybe we should
 397          just abort if this happens?  */
 398       *output = '\0';
 399       return output;
 400     }
 401   ptm = localtime (&secs);
 402   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 403            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 404            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 405   return output;
 406 }
 407
 408 /* Returns an error message for ERRNUM.  #### This requires more work.
 409    This function, as well as the whole error system, is very
 410    ill-conceived.  */
 411 const char *
 412 uerrmsg (uerr_t errnum)
 413 {
 414   switch (errnum)
 415     {
 416     case URLUNKNOWN:
 417       return _("Unknown/unsupported protocol");
 418       break;
 419     case URLBADPORT:
 420       return _("Invalid port specification");
 421       break;
 422     case URLBADHOST:
 423       return _("Invalid host name");
 424       break;
 425     default:
 426       abort ();
 427       /* $@#@#$ compiler.  */
 428       return NULL;
 429     }
 430 }
 431 \f
 432 /* The Windows versions of the following two functions are defined in
 433    mswindows.c.  */
 434
 435 #ifndef WINDOWS
 436 void
 437 fork_to_background (void)
 438 {
 439   pid_t pid;
 440   /* Whether we arrange our own version of opt.lfilename here.  */
 441   int changedp = 0;
 442
 443   if (!opt.lfilename)
 444     {
 445       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 446       changedp = 1;
 447     }
 448   pid = fork ();
 449   if (pid < 0)
 450     {
 451       /* parent, error */
 452       perror ("fork");
 453       exit (1);
 454     }
 455   else if (pid != 0)
 456     {
 457       /* parent, no error */
 458       printf (_("Continuing in background.\n"));
 459       if (changedp)
 460         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 461       exit (0);
 462     }
 463   /* child: keep running */
 464 }
 465 #endif /* not WINDOWS */
 466 \f
 467 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 468    in that:
 469         Multple `/'s are collapsed to a single `/'.
 470         Leading `./'s and trailing `/.'s are removed.
 471         Trailing `/'s are removed.
 472         Non-leading `../'s and trailing `..'s are handled by removing
 473         portions of the path.
 474
 475    E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
 476    from GNU Bash.
 477
 478    Changes for Wget:
 479         Always use '/' as stub_char.
 480         Don't check for local things using canon_stat.
 481         Change the original string instead of strdup-ing.
 482         React correctly when beginning with `./' and `../'.  */
 483 void
 484 path_simplify (char *path)
 485 {
 486   register int i, start, ddot;
 487   char stub_char;
 488
 489   if (!*path)
 490     return;
 491
 492   /*stub_char = (*path == '/') ? '/' : '.';*/
 493   stub_char = '/';
 494
 495   /* Addition: Remove all `./'-s preceding the string.  If `../'-s
 496      precede, put `/' in front and remove them too.  */
 497   i = 0;
 498   ddot = 0;
 499   while (1)
 500     {
 501       if (path[i] == '.' && path[i + 1] == '/')
 502         i += 2;
 503       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 504         {
 505           i += 3;
 506           ddot = 1;
 507         }
 508       else
 509         break;
 510     }
 511   if (i)
 512     strcpy (path, path + i - ddot);
 513
 514   /* Replace single `.' or `..' with `/'.  */
 515   if ((path[0] == '.' && path[1] == '\0')
 516       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 517     {
 518       path[0] = stub_char;
 519       path[1] = '\0';
 520       return;
 521     }
 522   /* Walk along PATH looking for things to compact.  */
 523   i = 0;
 524   while (1)
 525     {
 526       if (!path[i])
 527         break;
 528
 529       while (path[i] && path[i] != '/')
 530         i++;
 531
 532       start = i++;
 533
 534       /* If we didn't find any slashes, then there is nothing left to do.  */
 535       if (!path[start])
 536         break;
 537
 538       /* Handle multiple `/'s in a row.  */
 539       while (path[i] == '/')
 540         i++;
 541
 542       if ((start + 1) != i)
 543         {
 544           strcpy (path + start + 1, path + i);
 545           i = start + 1;
 546         }
 547
 548       /* Check for trailing `/'.  */
 549       if (start && !path[i])
 550         {
 551         zero_last:
 552           path[--i] = '\0';
 553           break;
 554         }
 555
 556       /* Check for `../', `./' or trailing `.' by itself.  */
 557       if (path[i] == '.')
 558         {
 559           /* Handle trailing `.' by itself.  */
 560           if (!path[i + 1])
 561             goto zero_last;
 562
 563           /* Handle `./'.  */
 564           if (path[i + 1] == '/')
 565             {
 566               strcpy (path + i, path + i + 1);
 567               i = (start < 0) ? 0 : start;
 568               continue;
 569             }
 570
 571           /* Handle `../' or trailing `..' by itself.  */
 572           if (path[i + 1] == '.' &&
 573               (path[i + 2] == '/' || !path[i + 2]))
 574             {
 575               while (--start > -1 && path[start] != '/');
 576               strcpy (path + start + 1, path + i + 2);
 577               i = (start < 0) ? 0 : start;
 578               continue;
 579             }
 580         }       /* path == '.' */
 581     } /* while */
 582
 583   if (!*path)
 584     {
 585       *path = stub_char;
 586       path[1] = '\0';
 587     }
 588 }
 589 \f
 590 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 591    specified with TM.  */
 592 void
 593 touch (const char *file, time_t tm)
 594 {
 595 #ifdef HAVE_STRUCT_UTIMBUF
 596   struct utimbuf times;
 597   times.actime = times.modtime = tm;
 598 #else
 599   time_t times[2];
 600   times[0] = times[1] = tm;
 601 #endif
 602
 603   if (utime (file, &times) == -1)
 604     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 605 }
 606
 607 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 608    nothing under MS-Windows.  */
 609 int
 610 remove_link (const char *file)
 611 {
 612   int err = 0;
 613   struct stat st;
 614
 615   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 616     {
 617       DEBUGP (("Unlinking %s (symlink).\n", file));
 618       err = unlink (file);
 619       if (err != 0)
 620         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 621                    file, strerror (errno));
 622     }
 623   return err;
 624 }
 625
 626 /* Does FILENAME exist?  This is quite a lousy implementation, since
 627    it supplies no error codes -- only a yes-or-no answer.  Thus it
 628    will return that a file does not exist if, e.g., the directory is
 629    unreadable.  I don't mind it too much currently, though.  The
 630    proper way should, of course, be to have a third, error state,
 631    other than true/false, but that would introduce uncalled-for
 632    additional complexity to the callers.  */
 633 int
 634 file_exists_p (const char *filename)
 635 {
 636 #ifdef HAVE_ACCESS
 637   return access (filename, F_OK) >= 0;
 638 #else
 639   struct stat buf;
 640   return stat (filename, &buf) >= 0;
 641 #endif
 642 }
 643
 644 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 645    Returns 0 on error.  */
 646 int
 647 file_non_directory_p (const char *path)
 648 {
 649   struct stat buf;
 650   /* Use lstat() rather than stat() so that symbolic links pointing to
 651      directories can be identified correctly.  */
 652   if (lstat (path, &buf) != 0)
 653     return 0;
 654   return S_ISDIR (buf.st_mode) ? 0 : 1;
 655 }
 656
 657 /* Return a unique filename, given a prefix and count */
 658 static char *
 659 unique_name_1 (const char *fileprefix, int count)
 660 {
 661   char *filename;
 662
 663   if (count)
 664     {
 665       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 666       sprintf (filename, "%s.%d", fileprefix, count);
 667     }
 668   else
 669     filename = xstrdup (fileprefix);
 670
 671   if (!file_exists_p (filename))
 672     return filename;
 673   else
 674     {
 675       xfree (filename);
 676       return NULL;
 677     }
 678 }
 679
 680 /* Return a unique file name, based on PREFIX.  */
 681 char *
 682 unique_name (const char *prefix)
 683 {
 684   char *file = NULL;
 685   int count = 0;
 686
 687   while (!file)
 688     file = unique_name_1 (prefix, count++);
 689   return file;
 690 }
 691 \f
 692 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 693    are missing, create them first.  In case any mkdir() call fails,
 694    return its error status.  Returns 0 on successful completion.
 695
 696    The behaviour of this function should be identical to the behaviour
 697    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 698 int
 699 make_directory (const char *directory)
 700 {
 701   int quit = 0;
 702   int i;
 703   char *dir;
 704
 705   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 706      function is unsafe if called with a read-only char *argument.  */
 707   STRDUP_ALLOCA (dir, directory);
 708
 709   /* If the first character of dir is '/', skip it (and thus enable
 710      creation of absolute-pathname directories.  */
 711   for (i = (*dir == '/'); 1; ++i)
 712     {
 713       for (; dir[i] && dir[i] != '/'; i++)
 714         ;
 715       if (!dir[i])
 716         quit = 1;
 717       dir[i] = '\0';
 718       /* Check whether the directory already exists.  */
 719       if (!file_exists_p (dir))
 720         {
 721           if (mkdir (dir, 0777) < 0)
 722             return -1;
 723         }
 724       if (quit)
 725         break;
 726       else
 727         dir[i] = '/';
 728     }
 729   return 0;
 730 }
 731 \f
 732 static int in_acclist PARAMS ((const char *const *, const char *, int));
 733
 734 /* Determine whether a file is acceptable to be followed, according to
 735    lists of patterns to accept/reject.  */
 736 int
 737 acceptable (const char *s)
 738 {
 739   int l = strlen (s);
 740
 741   while (l && s[l] != '/')
 742     --l;
 743   if (s[l] == '/')
 744     s += (l + 1);
 745   if (opt.accepts)
 746     {
 747       if (opt.rejects)
 748         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 749                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 750       else
 751         return in_acclist ((const char *const *)opt.accepts, s, 1);
 752     }
 753   else if (opt.rejects)
 754     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 755   return 1;
 756 }
 757
 758 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 759    `/something', frontcmp() will return 1 only if S2 begins with
 760    `/something'.  Otherwise, 0 is returned.  */
 761 int
 762 frontcmp (const char *s1, const char *s2)
 763 {
 764   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 765   return !*s1;
 766 }
 767
 768 /* Iterate through STRLIST, and return the first element that matches
 769    S, through wildcards or front comparison (as appropriate).  */
 770 static char *
 771 proclist (char **strlist, const char *s, enum accd flags)
 772 {
 773   char **x;
 774
 775   for (x = strlist; *x; x++)
 776     if (has_wildcards_p (*x))
 777       {
 778         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 779           break;
 780       }
 781     else
 782       {
 783         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 784         if (frontcmp (p, s))
 785           break;
 786       }
 787   return *x;
 788 }
 789
 790 /* Returns whether DIRECTORY is acceptable for download, wrt the
 791    include/exclude lists.
 792
 793    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 794    and absolute paths may be freely intermixed.  */
 795 int
 796 accdir (const char *directory, enum accd flags)
 797 {
 798   /* Remove starting '/'.  */
 799   if (flags & ALLABS && *directory == '/')
 800     ++directory;
 801   if (opt.includes)
 802     {
 803       if (!proclist (opt.includes, directory, flags))
 804         return 0;
 805     }
 806   if (opt.excludes)
 807     {
 808       if (proclist (opt.excludes, directory, flags))
 809         return 0;
 810     }
 811   return 1;
 812 }
 813
 814 /* Match the end of STRING against PATTERN.  For instance:
 815
 816    match_backwards ("abc", "bc") -> 1
 817    match_backwards ("abc", "ab") -> 0
 818    match_backwards ("abc", "abc") -> 1 */
 819 static int
 820 match_backwards (const char *string, const char *pattern)
 821 {
 822   int i, j;
 823
 824   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 825     if (string[i] != pattern[j])
 826       break;
 827   /* If the pattern was exhausted, the match was succesful.  */
 828   if (j == -1)
 829     return 1;
 830   else
 831     return 0;
 832 }
 833
 834 /* Checks whether string S matches each element of ACCEPTS.  A list
 835    element are matched either with fnmatch() or match_backwards(),
 836    according to whether the element contains wildcards or not.
 837
 838    If the BACKWARD is 0, don't do backward comparison -- just compare
 839    them normally.  */
 840 static int
 841 in_acclist (const char *const *accepts, const char *s, int backward)
 842 {
 843   for (; *accepts; accepts++)
 844     {
 845       if (has_wildcards_p (*accepts))
 846         {
 847           /* fnmatch returns 0 if the pattern *does* match the
 848              string.  */
 849           if (fnmatch (*accepts, s, 0) == 0)
 850             return 1;
 851         }
 852       else
 853         {
 854           if (backward)
 855             {
 856               if (match_backwards (s, *accepts))
 857                 return 1;
 858             }
 859           else
 860             {
 861               if (!strcmp (s, *accepts))
 862                 return 1;
 863             }
 864         }
 865     }
 866   return 0;
 867 }
 868
 869 /* Return the malloc-ed suffix of STR.  For instance:
 870    suffix ("foo.bar")       -> "bar"
 871    suffix ("foo.bar.baz")   -> "baz"
 872    suffix ("/foo/bar")      -> NULL
 873    suffix ("/foo.bar/baz")  -> NULL  */
 874 char *
 875 suffix (const char *str)
 876 {
 877   int i;
 878
 879   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
 880   if (str[i++] == '.')
 881     return xstrdup (str + i);
 882   else
 883     return NULL;
 884 }
 885
 886 /* Read a line from FP.  The function reallocs the storage as needed
 887    to accomodate for any length of the line.  Reallocs are done
 888    storage exponentially, doubling the storage after each overflow to
 889    minimize the number of calls to realloc() and fgets().  The newline
 890    character at the end of line is retained.
 891
 892    After end-of-file is encountered without anything being read, NULL
 893    is returned.  NULL is also returned on error.  To distinguish
 894    between these two cases, use the stdio function ferror().  */
 895
 896 char *
 897 read_whole_line (FILE *fp)
 898 {
 899   int length = 0;
 900   int bufsize = 81;
 901   char *line = (char *)xmalloc (bufsize);
 902
 903   while (fgets (line + length, bufsize - length, fp))
 904     {
 905       length += strlen (line + length);
 906       assert (length > 0);
 907       if (line[length - 1] == '\n')
 908         break;
 909       /* fgets() guarantees to read the whole line, or to use up the
 910          space we've given it.  We can double the buffer
 911          unconditionally.  */
 912       bufsize <<= 1;
 913       line = xrealloc (line, bufsize);
 914     }
 915   if (length == 0 || ferror (fp))
 916     {
 917       xfree (line);
 918       return NULL;
 919     }
 920   if (length + 1 < bufsize)
 921     /* Relieve the memory from our exponential greediness.  We say
 922        `length + 1' because the terminating \0 is not included in
 923        LENGTH.  We don't need to zero-terminate the string ourselves,
 924        though, because fgets() does that.  */
 925     line = xrealloc (line, length + 1);
 926   return line;
 927 }
 928 \f
 929 /* Read FILE into memory.  A pointer to `struct file_memory' are
 930    returned; use struct element `content' to access file contents, and
 931    the element `length' to know the file length.  `content' is *not*
 932    zero-terminated, and you should *not* read or write beyond the [0,
 933    length) range of characters.
 934
 935    After you are done with the file contents, call read_file_free to
 936    release the memory.
 937
 938    Depending on the operating system and the type of file that is
 939    being read, read_file() either mmap's the file into memory, or
 940    reads the file into the core using read().
 941
 942    If file is named "-", fileno(stdin) is used for reading instead.
 943    If you want to read from a real file named "-", use "./-" instead.  */
 944
 945 struct file_memory *
 946 read_file (const char *file)
 947 {
 948   int fd;
 949   struct file_memory *fm;
 950   long size;
 951   int inhibit_close = 0;
 952
 953   /* Some magic in the finest tradition of Perl and its kin: if FILE
 954      is "-", just use stdin.  */
 955   if (HYPHENP (file))
 956     {
 957       fd = fileno (stdin);
 958       inhibit_close = 1;
 959       /* Note that we don't inhibit mmap() in this case.  If stdin is
 960          redirected from a regular file, mmap() will still work.  */
 961     }
 962   else
 963     fd = open (file, O_RDONLY);
 964   if (fd < 0)
 965     return NULL;
 966   fm = xmalloc (sizeof (struct file_memory));
 967
 968 #ifdef HAVE_MMAP
 969   {
 970     struct stat buf;
 971     if (fstat (fd, &buf) < 0)
 972       goto mmap_lose;
 973     fm->length = buf.st_size;
 974     /* NOTE: As far as I know, the callers of this function never
 975        modify the file text.  Relying on this would enable us to
 976        specify PROT_READ and MAP_SHARED for a marginal gain in
 977        efficiency, but at some cost to generality.  */
 978     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 979                         MAP_PRIVATE, fd, 0);
 980     if (fm->content == (char *)MAP_FAILED)
 981       goto mmap_lose;
 982     if (!inhibit_close)
 983       close (fd);
 984
 985     fm->mmap_p = 1;
 986     return fm;
 987   }
 988
 989  mmap_lose:
 990   /* The most common reason why mmap() fails is that FD does not point
 991      to a plain file.  However, it's also possible that mmap() doesn't
 992      work for a particular type of file.  Therefore, whenever mmap()
 993      fails, we just fall back to the regular method.  */
 994 #endif /* HAVE_MMAP */
 995
 996   fm->length = 0;
 997   size = 512;                   /* number of bytes fm->contents can
 998                                    hold at any given time. */
 999   fm->content = xmalloc (size);
1000   while (1)
1001     {
1002       long nread;
1003       if (fm->length > size / 2)
1004         {
1005           /* #### I'm not sure whether the whole exponential-growth
1006              thing makes sense with kernel read.  On Linux at least,
1007              read() refuses to read more than 4K from a file at a
1008              single chunk anyway.  But other Unixes might optimize it
1009              better, and it doesn't *hurt* anything, so I'm leaving
1010              it.  */
1011
1012           /* Normally, we grow SIZE exponentially to make the number
1013              of calls to read() and realloc() logarithmic in relation
1014              to file size.  However, read() can read an amount of data
1015              smaller than requested, and it would be unreasonably to
1016              double SIZE every time *something* was read.  Therefore,
1017              we double SIZE only when the length exceeds half of the
1018              entire allocated size.  */
1019           size <<= 1;
1020           fm->content = xrealloc (fm->content, size);
1021         }
1022       nread = read (fd, fm->content + fm->length, size - fm->length);
1023       if (nread > 0)
1024         /* Successful read. */
1025         fm->length += nread;
1026       else if (nread < 0)
1027         /* Error. */
1028         goto lose;
1029       else
1030         /* EOF */
1031         break;
1032     }
1033   if (!inhibit_close)
1034     close (fd);
1035   if (size > fm->length && fm->length != 0)
1036     /* Due to exponential growth of fm->content, the allocated region
1037        might be much larger than what is actually needed.  */
1038     fm->content = xrealloc (fm->content, fm->length);
1039   fm->mmap_p = 0;
1040   return fm;
1041
1042  lose:
1043   if (!inhibit_close)
1044     close (fd);
1045   xfree (fm->content);
1046   xfree (fm);
1047   return NULL;
1048 }
1049
1050 /* Release the resources held by FM.  Specifically, this calls
1051    munmap() or xfree() on fm->content, depending whether mmap or
1052    malloc/read were used to read in the file.  It also frees the
1053    memory needed to hold the FM structure itself.  */
1054
1055 void
1056 read_file_free (struct file_memory *fm)
1057 {
1058 #ifdef HAVE_MMAP
1059   if (fm->mmap_p)
1060     {
1061       munmap (fm->content, fm->length);
1062     }
1063   else
1064 #endif
1065     {
1066       xfree (fm->content);
1067     }
1068   xfree (fm);
1069 }
1070 \f
1071 /* Free the pointers in a NULL-terminated vector of pointers, then
1072    free the pointer itself.  */
1073 void
1074 free_vec (char **vec)
1075 {
1076   if (vec)
1077     {
1078       char **p = vec;
1079       while (*p)
1080         xfree (*p++);
1081       xfree (vec);
1082     }
1083 }
1084
1085 /* Append vector V2 to vector V1.  The function frees V2 and
1086    reallocates V1 (thus you may not use the contents of neither
1087    pointer after the call).  If V1 is NULL, V2 is returned.  */
1088 char **
1089 merge_vecs (char **v1, char **v2)
1090 {
1091   int i, j;
1092
1093   if (!v1)
1094     return v2;
1095   if (!v2)
1096     return v1;
1097   if (!*v2)
1098     {
1099       /* To avoid j == 0 */
1100       xfree (v2);
1101       return v1;
1102     }
1103   /* Count v1.  */
1104   for (i = 0; v1[i]; i++);
1105   /* Count v2.  */
1106   for (j = 0; v2[j]; j++);
1107   /* Reallocate v1.  */
1108   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1109   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1110   xfree (v2);
1111   return v1;
1112 }
1113
1114 /* A set of simple-minded routines to store strings in a linked list.
1115    This used to also be used for searching, but now we have hash
1116    tables for that.  */
1117
1118 /* It's a shame that these simple things like linked lists and hash
1119    tables (see hash.c) need to be implemented over and over again.  It
1120    would be nice to be able to use the routines from glib -- see
1121    www.gtk.org for details.  However, that would make Wget depend on
1122    glib, and I want to avoid dependencies to external libraries for
1123    reasons of convenience and portability (I suspect Wget is more
1124    portable than anything ever written for Gnome).  */
1125
1126 /* Append an element to the list.  If the list has a huge number of
1127    elements, this can get slow because it has to find the list's
1128    ending.  If you think you have to call slist_append in a loop,
1129    think about calling slist_prepend() followed by slist_nreverse().  */
1130
1131 slist *
1132 slist_append (slist *l, const char *s)
1133 {
1134   slist *newel = (slist *)xmalloc (sizeof (slist));
1135   slist *beg = l;
1136
1137   newel->string = xstrdup (s);
1138   newel->next = NULL;
1139
1140   if (!l)
1141     return newel;
1142   /* Find the last element.  */
1143   while (l->next)
1144     l = l->next;
1145   l->next = newel;
1146   return beg;
1147 }
1148
1149 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1150
1151 slist *
1152 slist_prepend (slist *l, const char *s)
1153 {
1154   slist *newel = (slist *)xmalloc (sizeof (slist));
1155   newel->string = xstrdup (s);
1156   newel->next = l;
1157   return newel;
1158 }
1159
1160 /* Destructively reverse L. */
1161
1162 slist *
1163 slist_nreverse (slist *l)
1164 {
1165   slist *prev = NULL;
1166   while (l)
1167     {
1168       slist *next = l->next;
1169       l->next = prev;
1170       prev = l;
1171       l = next;
1172     }
1173   return prev;
1174 }
1175
1176 /* Is there a specific entry in the list?  */
1177 int
1178 slist_contains (slist *l, const char *s)
1179 {
1180   for (; l; l = l->next)
1181     if (!strcmp (l->string, s))
1182       return 1;
1183   return 0;
1184 }
1185
1186 /* Free the whole slist.  */
1187 void
1188 slist_free (slist *l)
1189 {
1190   while (l)
1191     {
1192       slist *n = l->next;
1193       xfree (l->string);
1194       xfree (l);
1195       l = n;
1196     }
1197 }
1198 \f
1199 /* Sometimes it's useful to create "sets" of strings, i.e. special
1200    hash tables where you want to store strings as keys and merely
1201    query for their existence.  Here is a set of utility routines that
1202    makes that transparent.  */
1203
1204 void
1205 string_set_add (struct hash_table *ht, const char *s)
1206 {
1207   /* First check whether the set element already exists.  If it does,
1208      do nothing so that we don't have to free() the old element and
1209      then strdup() a new one.  */
1210   if (hash_table_contains (ht, s))
1211     return;
1212
1213   /* We use "1" as value.  It provides us a useful and clear arbitrary
1214      value, and it consumes no memory -- the pointers to the same
1215      string "1" will be shared by all the key-value pairs in all `set'
1216      hash tables.  */
1217   hash_table_put (ht, xstrdup (s), "1");
1218 }
1219
1220 /* Synonym for hash_table_contains... */
1221
1222 int
1223 string_set_contains (struct hash_table *ht, const char *s)
1224 {
1225   return hash_table_contains (ht, s);
1226 }
1227
1228 static int
1229 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1230 {
1231   xfree (key);
1232   return 0;
1233 }
1234
1235 void
1236 string_set_free (struct hash_table *ht)
1237 {
1238   hash_table_map (ht, string_set_free_mapper, NULL);
1239   hash_table_destroy (ht);
1240 }
1241
1242 static int
1243 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1244 {
1245   xfree (key);
1246   xfree (value);
1247   return 0;
1248 }
1249
1250 /* Another utility function: call free() on all keys and values of HT.  */
1251
1252 void
1253 free_keys_and_values (struct hash_table *ht)
1254 {
1255   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1256 }
1257
1258 \f
1259 /* Engine for legible and legible_very_long; this function works on
1260    strings.  */
1261
1262 static char *
1263 legible_1 (const char *repr)
1264 {
1265   static char outbuf[128];
1266   int i, i1, mod;
1267   char *outptr;
1268   const char *inptr;
1269
1270   /* Reset the pointers.  */
1271   outptr = outbuf;
1272   inptr = repr;
1273   /* If the number is negative, shift the pointers.  */
1274   if (*inptr == '-')
1275     {
1276       *outptr++ = '-';
1277       ++inptr;
1278     }
1279   /* How many digits before the first separator?  */
1280   mod = strlen (inptr) % 3;
1281   /* Insert them.  */
1282   for (i = 0; i < mod; i++)
1283     *outptr++ = inptr[i];
1284   /* Now insert the rest of them, putting separator before every
1285      third digit.  */
1286   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1287     {
1288       if (i % 3 == 0 && i1 != 0)
1289         *outptr++ = ',';
1290       *outptr++ = inptr[i1];
1291     }
1292   /* Zero-terminate the string.  */
1293   *outptr = '\0';
1294   return outbuf;
1295 }
1296
1297 /* Legible -- return a static pointer to the legibly printed long.  */
1298 char *
1299 legible (long l)
1300 {
1301   char inbuf[24];
1302   /* Print the number into the buffer.  */
1303   long_to_string (inbuf, l);
1304   return legible_1 (inbuf);
1305 }
1306
1307 /* Write a string representation of NUMBER into the provided buffer.
1308    We cannot use sprintf() because we cannot be sure whether the
1309    platform supports printing of what we chose for VERY_LONG_TYPE.
1310
1311    Example: Gcc supports `long long' under many platforms, but on many
1312    of those the native libc knows nothing of it and therefore cannot
1313    print it.
1314
1315    How long BUFFER needs to be depends on the platform and the content
1316    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1317    bytes are sufficient.  Using more might be a good idea.
1318
1319    This function does not go through the hoops that long_to_string
1320    goes to because it doesn't aspire to be fast.  (It's called perhaps
1321    once in a Wget run.)  */
1322
1323 static void
1324 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1325 {
1326   int i = 0;
1327   int j;
1328
1329   /* Print the number backwards... */
1330   do
1331     {
1332       buffer[i++] = '0' + number % 10;
1333       number /= 10;
1334     }
1335   while (number);
1336
1337   /* ...and reverse the order of the digits. */
1338   for (j = 0; j < i / 2; j++)
1339     {
1340       char c = buffer[j];
1341       buffer[j] = buffer[i - 1 - j];
1342       buffer[i - 1 - j] = c;
1343     }
1344   buffer[i] = '\0';
1345 }
1346
1347 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1348 char *
1349 legible_very_long (VERY_LONG_TYPE l)
1350 {
1351   char inbuf[128];
1352   /* Print the number into the buffer.  */
1353   very_long_to_string (inbuf, l);
1354   return legible_1 (inbuf);
1355 }
1356
1357 /* Count the digits in a (long) integer.  */
1358 int
1359 numdigit (long a)
1360 {
1361   int res = 1;
1362   if (a < 0)
1363     {
1364       a = -a;
1365       ++res;
1366     }
1367   while ((a /= 10) != 0)
1368     ++res;
1369   return res;
1370 }
1371
1372 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1373 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1374
1375 #define DIGITS_1(figure) ONE_DIGIT (figure)
1376 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1377 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1378 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1379 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1380 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1381 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1382 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1383 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1384 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1385
1386 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1387
1388 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1389 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1390 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1391 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1392 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1393 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1394 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1395 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1396 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1397
1398 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1399    to `sprintf(buffer, "%ld", number)', only much faster.
1400
1401    The speedup may make a difference in programs that frequently
1402    convert numbers to strings.  Some implementations of sprintf,
1403    particularly the one in GNU libc, have been known to be extremely
1404    slow compared to this function.
1405
1406    BUFFER should accept as many bytes as you expect the number to take
1407    up.  On machines with 64-bit longs the maximum needed size is 24
1408    bytes.  That includes the worst-case digits, the optional `-' sign,
1409    and the trailing \0.  */
1410
1411 void
1412 long_to_string (char *buffer, long number)
1413 {
1414   char *p = buffer;
1415   long n = number;
1416
1417 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1418   /* We are running in a strange or misconfigured environment.  Let
1419      sprintf cope with it.  */
1420   sprintf (buffer, "%ld", n);
1421 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1422
1423   if (n < 0)
1424     {
1425       *p++ = '-';
1426       n = -n;
1427     }
1428
1429   if      (n < 10)                   { DIGITS_1 (1); }
1430   else if (n < 100)                  { DIGITS_2 (10); }
1431   else if (n < 1000)                 { DIGITS_3 (100); }
1432   else if (n < 10000)                { DIGITS_4 (1000); }
1433   else if (n < 100000)               { DIGITS_5 (10000); }
1434   else if (n < 1000000)              { DIGITS_6 (100000); }
1435   else if (n < 10000000)             { DIGITS_7 (1000000); }
1436   else if (n < 100000000)            { DIGITS_8 (10000000); }
1437   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1438 #if SIZEOF_LONG == 4
1439   /* ``if (1)'' serves only to preserve editor indentation. */
1440   else if (1)                        { DIGITS_10 (1000000000); }
1441 #else  /* SIZEOF_LONG != 4 */
1442   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1443   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1444   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1445   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1446   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1447   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1448   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1449   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1450   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1451   else                               { DIGITS_19 (1000000000000000000L); }
1452 #endif /* SIZEOF_LONG != 4 */
1453
1454   *p = '\0';
1455 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1456 }
1457
1458 #undef ONE_DIGIT
1459 #undef ONE_DIGIT_ADVANCE
1460
1461 #undef DIGITS_1
1462 #undef DIGITS_2
1463 #undef DIGITS_3
1464 #undef DIGITS_4
1465 #undef DIGITS_5
1466 #undef DIGITS_6
1467 #undef DIGITS_7
1468 #undef DIGITS_8
1469 #undef DIGITS_9
1470 #undef DIGITS_10
1471 #undef DIGITS_11
1472 #undef DIGITS_12
1473 #undef DIGITS_13
1474 #undef DIGITS_14
1475 #undef DIGITS_15
1476 #undef DIGITS_16
1477 #undef DIGITS_17
1478 #undef DIGITS_18
1479 #undef DIGITS_19
1480 \f
1481 /* Support for timers. */
1482
1483 #undef TIMER_WINDOWS
1484 #undef TIMER_GETTIMEOFDAY
1485 #undef TIMER_TIME
1486
1487 /* Depending on the OS and availability of gettimeofday(), one and
1488    only one of the above constants will be defined.  Virtually all
1489    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1490    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1491    non-Windows systems without gettimeofday.
1492
1493    #### Perhaps we should also support ftime(), which exists on old
1494    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1495    C, if memory serves me.)  */
1496
1497 #ifdef WINDOWS
1498 # define TIMER_WINDOWS
1499 #else  /* not WINDOWS */
1500 # ifdef HAVE_GETTIMEOFDAY
1501 #  define TIMER_GETTIMEOFDAY
1502 # else
1503 #  define TIMER_TIME
1504 # endif
1505 #endif /* not WINDOWS */
1506
1507 struct wget_timer {
1508 #ifdef TIMER_GETTIMEOFDAY
1509   long secs;
1510   long usecs;
1511 #endif
1512
1513 #ifdef TIMER_TIME
1514   time_t secs;
1515 #endif
1516
1517 #ifdef TIMER_WINDOWS
1518   ULARGE_INTEGER wintime;
1519 #endif
1520 };
1521
1522 /* Allocate a timer.  It is not legal to do anything with a freshly
1523    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1524
1525 struct wget_timer *
1526 wtimer_allocate (void)
1527 {
1528   struct wget_timer *wt =
1529     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1530   return wt;
1531 }
1532
1533 /* Allocate a new timer and reset it.  Return the new timer. */
1534
1535 struct wget_timer *
1536 wtimer_new (void)
1537 {
1538   struct wget_timer *wt = wtimer_allocate ();
1539   wtimer_reset (wt);
1540   return wt;
1541 }
1542
1543 /* Free the resources associated with the timer.  Its further use is
1544    prohibited.  */
1545
1546 void
1547 wtimer_delete (struct wget_timer *wt)
1548 {
1549   xfree (wt);
1550 }
1551
1552 /* Reset timer WT.  This establishes the starting point from which
1553    wtimer_elapsed() will return the number of elapsed
1554    milliseconds.  It is allowed to reset a previously used timer.  */
1555
1556 void
1557 wtimer_reset (struct wget_timer *wt)
1558 {
1559 #ifdef TIMER_GETTIMEOFDAY
1560   struct timeval t;
1561   gettimeofday (&t, NULL);
1562   wt->secs  = t.tv_sec;
1563   wt->usecs = t.tv_usec;
1564 #endif
1565
1566 #ifdef TIMER_TIME
1567   wt->secs = time (NULL);
1568 #endif
1569
1570 #ifdef TIMER_WINDOWS
1571   FILETIME ft;
1572   SYSTEMTIME st;
1573   GetSystemTime (&st);
1574   SystemTimeToFileTime (&st, &ft);
1575   wt->wintime.HighPart = ft.dwHighDateTime;
1576   wt->wintime.LowPart  = ft.dwLowDateTime;
1577 #endif
1578 }
1579
1580 /* Return the number of milliseconds elapsed since the timer was last
1581    reset.  It is allowed to call this function more than once to get
1582    increasingly higher elapsed values.  */
1583
1584 long
1585 wtimer_elapsed (struct wget_timer *wt)
1586 {
1587 #ifdef TIMER_GETTIMEOFDAY
1588   struct timeval t;
1589   gettimeofday (&t, NULL);
1590   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1591 #endif
1592
1593 #ifdef TIMER_TIME
1594   time_t now = time (NULL);
1595   return 1000 * (now - wt->secs);
1596 #endif
1597
1598 #ifdef WINDOWS
1599   FILETIME ft;
1600   SYSTEMTIME st;
1601   ULARGE_INTEGER uli;
1602   GetSystemTime (&st);
1603   SystemTimeToFileTime (&st, &ft);
1604   uli.HighPart = ft.dwHighDateTime;
1605   uli.LowPart = ft.dwLowDateTime;
1606   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1607 #endif
1608 }
1609
1610 /* Return the assessed granularity of the timer implementation.  This
1611    is important for certain code that tries to deal with "zero" time
1612    intervals.  */
1613
1614 long
1615 wtimer_granularity (void)
1616 {
1617 #ifdef TIMER_GETTIMEOFDAY
1618   /* Granularity of gettimeofday is hugely architecture-dependent.
1619      However, it appears that on modern machines it is better than
1620      1ms.  */
1621   return 1;
1622 #endif
1623
1624 #ifdef TIMER_TIME
1625   /* This is clear. */
1626   return 1000;
1627 #endif
1628
1629 #ifdef TIMER_WINDOWS
1630   /* ? */
1631   return 1;
1632 #endif
1633 }
1634 \f
1635 /* This should probably be at a better place, but it doesn't really
1636    fit into html-parse.c.  */
1637
1638 /* The function returns the pointer to the malloc-ed quoted version of
1639    string s.  It will recognize and quote numeric and special graphic
1640    entities, as per RFC1866:
1641
1642    `&' -> `&amp;'
1643    `<' -> `&lt;'
1644    `>' -> `&gt;'
1645    `"' -> `&quot;'
1646    SP  -> `&#32;'
1647
1648    No other entities are recognized or replaced.  */
1649 char *
1650 html_quote_string (const char *s)
1651 {
1652   const char *b = s;
1653   char *p, *res;
1654   int i;
1655
1656   /* Pass through the string, and count the new size.  */
1657   for (i = 0; *s; s++, i++)
1658     {
1659       if (*s == '&')
1660         i += 4;                 /* `amp;' */
1661       else if (*s == '<' || *s == '>')
1662         i += 3;                 /* `lt;' and `gt;' */
1663       else if (*s == '\"')
1664         i += 5;                 /* `quot;' */
1665       else if (*s == ' ')
1666         i += 4;                 /* #32; */
1667     }
1668   res = (char *)xmalloc (i + 1);
1669   s = b;
1670   for (p = res; *s; s++)
1671     {
1672       switch (*s)
1673         {
1674         case '&':
1675           *p++ = '&';
1676           *p++ = 'a';
1677           *p++ = 'm';
1678           *p++ = 'p';
1679           *p++ = ';';
1680           break;
1681         case '<': case '>':
1682           *p++ = '&';
1683           *p++ = (*s == '<' ? 'l' : 'g');
1684           *p++ = 't';
1685           *p++ = ';';
1686           break;
1687         case '\"':
1688           *p++ = '&';
1689           *p++ = 'q';
1690           *p++ = 'u';
1691           *p++ = 'o';
1692           *p++ = 't';
1693           *p++ = ';';
1694           break;
1695         case ' ':
1696           *p++ = '&';
1697           *p++ = '#';
1698           *p++ = '3';
1699           *p++ = '2';
1700           *p++ = ';';
1701           break;
1702         default:
1703           *p++ = *s;
1704         }
1705     }
1706   *p = '\0';
1707   return res;
1708 }