sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
   3
   4 This file is part of Wget.
   5
   6 This program is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9 (at your option) any later version.
  10
  11 This program is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with this program; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  19
  20 #include <config.h>
  21
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #ifdef HAVE_STRING_H
  25 # include <string.h>
  26 #else  /* not HAVE_STRING_H */
  27 # include <strings.h>
  28 #endif /* not HAVE_STRING_H */
  29 #include <sys/types.h>
  30 #ifdef HAVE_UNISTD_H
  31 # include <unistd.h>
  32 #endif
  33 #ifdef HAVE_MMAP
  34 # include <sys/mman.h>
  35 #endif
  36 #ifdef HAVE_PWD_H
  37 # include <pwd.h>
  38 #endif
  39 #include <limits.h>
  40 #ifdef HAVE_UTIME_H
  41 # include <utime.h>
  42 #endif
  43 #ifdef HAVE_SYS_UTIME_H
  44 # include <sys/utime.h>
  45 #endif
  46 #include <errno.h>
  47 #ifdef NeXT
  48 # include <libc.h>              /* for access() */
  49 #endif
  50 #include <fcntl.h>
  51 #include <assert.h>
  52
  53 #include "wget.h"
  54 #include "utils.h"
  55 #include "fnmatch.h"
  56 #include "hash.h"
  57
  58 #ifndef errno
  59 extern int errno;
  60 #endif
  61
  62 /* This section implements several wrappers around the basic
  63    allocation routines.  This is done for two reasons: first, so that
  64    the callers of these functions need not consistently check for
  65    errors.  If there is not enough virtual memory for running Wget,
  66    something is seriously wrong, and Wget exits with an appropriate
  67    error message.
  68
  69    The second reason why these are useful is that, if DEBUG_MALLOC is
  70    defined, they also provide a handy (if crude) malloc debugging
  71    interface that checks memory leaks.  */
  72
  73 /* Croak the fatal memory error and bail out with non-zero exit
  74    status.  */
  75 static void
  76 memfatal (const char *what)
  77 {
  78   /* HACK: expose save_log_p from log.c, so we can turn it off in
  79      order to prevent saving the log.  Saving the log is dangerous
  80      because logprintf() and logputs() can call malloc(), so this
  81      could infloop.  When logging is turned off, infloop can no longer
  82      happen.
  83
  84      #### This is no longer really necessary because the new routines
  85      in log.c cons only if the line exceeds eighty characters.  But
  86      this can come at the end of a line, so it's OK to be careful.
  87
  88      On a more serious note, it would be good to have a
  89      log_forced_shutdown() routine that exposes this cleanly.  */
  90   extern int save_log_p;
  91
  92   save_log_p = 0;
  93   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  94   exit (1);
  95 }
  96
  97 /* These functions end with _real because they need to be
  98    distinguished from the debugging functions, and from the macros.
  99    Explanation follows:
 100
 101    If memory debugging is not turned on, wget.h defines these:
 102
 103      #define xmalloc xmalloc_real
 104      #define xrealloc xrealloc_real
 105      #define xstrdup xstrdup_real
 106      #define xfree free
 107
 108    In case of memory debugging, the definitions are a bit more
 109    complex, because we want to provide more information, *and* we want
 110    to call the debugging code.  (The former is the reason why xmalloc
 111    and friends need to be macros in the first place.)  Then it looks
 112    like this:
 113
 114      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 115      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 116      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 117      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 118
 119    Each of the *_debug function does its magic and calls the real one.  */
 120
 121 #ifdef DEBUG_MALLOC
 122 # define STATIC_IF_DEBUG static
 123 #else
 124 # define STATIC_IF_DEBUG
 125 #endif
 126
 127 STATIC_IF_DEBUG void *
 128 xmalloc_real (size_t size)
 129 {
 130   void *ptr = malloc (size);
 131   if (!ptr)
 132     memfatal ("malloc");
 133   return ptr;
 134 }
 135
 136 STATIC_IF_DEBUG void *
 137 xrealloc_real (void *ptr, size_t newsize)
 138 {
 139   void *newptr;
 140
 141   /* Not all Un*xes have the feature of realloc() that calling it with
 142      a NULL-pointer is the same as malloc(), but it is easy to
 143      simulate.  */
 144   if (ptr)
 145     newptr = realloc (ptr, newsize);
 146   else
 147     newptr = malloc (newsize);
 148   if (!newptr)
 149     memfatal ("realloc");
 150   return newptr;
 151 }
 152
 153 STATIC_IF_DEBUG char *
 154 xstrdup_real (const char *s)
 155 {
 156   char *copy;
 157
 158 #ifndef HAVE_STRDUP
 159   int l = strlen (s);
 160   copy = malloc (l + 1);
 161   if (!copy)
 162     memfatal ("strdup");
 163   memcpy (copy, s, l + 1);
 164 #else  /* HAVE_STRDUP */
 165   copy = strdup (s);
 166   if (!copy)
 167     memfatal ("strdup");
 168 #endif /* HAVE_STRDUP */
 169
 170   return copy;
 171 }
 172
 173 #ifdef DEBUG_MALLOC
 174
 175 /* Crude home-grown routines for debugging some malloc-related
 176    problems.  Featured:
 177
 178    * Counting the number of malloc and free invocations, and reporting
 179      the "balance", i.e. how many times more malloc was called than it
 180      was the case with free.
 181
 182    * Making malloc store its entry into a simple array and free remove
 183      stuff from that array.  At the end, print the pointers which have
 184      not been freed, along with the source file and the line number.
 185      This also has the side-effect of detecting freeing memory that
 186      was never allocated.
 187
 188    Note that this kind of memory leak checking strongly depends on
 189    every malloc() being followed by a free(), even if the program is
 190    about to finish.  Wget is careful to free the data structure it
 191    allocated in init.c.  */
 192
 193 static int malloc_count, free_count;
 194
 195 static struct {
 196   char *ptr;
 197   const char *file;
 198   int line;
 199 } malloc_debug[100000];
 200
 201 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 202    which can be a real problem.  It would be nice to use a hash table
 203    for malloc_debug, but the functions in hash.c are not suitable
 204    because they can call malloc() themselves.  Maybe it would work if
 205    the hash table were preallocated to a huge size, and if we set the
 206    rehash threshold to 1.0.  */
 207
 208 /* Register PTR in malloc_debug.  Abort if this is not possible
 209    (presumably due to the number of current allocations exceeding the
 210    size of malloc_debug.)  */
 211
 212 static void
 213 register_ptr (void *ptr, const char *file, int line)
 214 {
 215   int i;
 216   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 217     if (malloc_debug[i].ptr == NULL)
 218       {
 219         malloc_debug[i].ptr = ptr;
 220         malloc_debug[i].file = file;
 221         malloc_debug[i].line = line;
 222         return;
 223       }
 224   abort ();
 225 }
 226
 227 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 228    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 229
 230 static void
 231 unregister_ptr (void *ptr)
 232 {
 233   int i;
 234   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 235     if (malloc_debug[i].ptr == ptr)
 236       {
 237         malloc_debug[i].ptr = NULL;
 238         return;
 239       }
 240   abort ();
 241 }
 242
 243 /* Print the malloc debug stats that can be gathered from the above
 244    information.  Currently this is the count of mallocs, frees, the
 245    difference between the two, and the dump of the contents of
 246    malloc_debug.  The last part are the memory leaks.  */
 247
 248 void
 249 print_malloc_debug_stats (void)
 250 {
 251   int i;
 252   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 253           malloc_count, free_count, malloc_count - free_count);
 254   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 255     if (malloc_debug[i].ptr != NULL)
 256       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 257               malloc_debug[i].file, malloc_debug[i].line);
 258 }
 259
 260 void *
 261 xmalloc_debug (size_t size, const char *source_file, int source_line)
 262 {
 263   void *ptr = xmalloc_real (size);
 264   ++malloc_count;
 265   register_ptr (ptr, source_file, source_line);
 266   return ptr;
 267 }
 268
 269 void
 270 xfree_debug (void *ptr, const char *source_file, int source_line)
 271 {
 272   assert (ptr != NULL);
 273   ++free_count;
 274   unregister_ptr (ptr);
 275   free (ptr);
 276 }
 277
 278 void *
 279 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 280 {
 281   void *newptr = xrealloc_real (ptr, newsize);
 282   if (!ptr)
 283     {
 284       ++malloc_count;
 285       register_ptr (newptr, source_file, source_line);
 286     }
 287   else if (newptr != ptr)
 288     {
 289       unregister_ptr (ptr);
 290       register_ptr (newptr, source_file, source_line);
 291     }
 292   return newptr;
 293 }
 294
 295 char *
 296 xstrdup_debug (const char *s, const char *source_file, int source_line)
 297 {
 298   char *copy = xstrdup_real (s);
 299   ++malloc_count;
 300   register_ptr (copy, source_file, source_line);
 301   return copy;
 302 }
 303
 304 #endif /* DEBUG_MALLOC */
 305 \f
 306 /* Copy the string formed by two pointers (one on the beginning, other
 307    on the char after the last char) to a new, malloc-ed location.
 308    0-terminate it.  */
 309 char *
 310 strdupdelim (const char *beg, const char *end)
 311 {
 312   char *res = (char *)xmalloc (end - beg + 1);
 313   memcpy (res, beg, end - beg);
 314   res[end - beg] = '\0';
 315   return res;
 316 }
 317
 318 /* Parse a string containing comma-separated elements, and return a
 319    vector of char pointers with the elements.  Spaces following the
 320    commas are ignored.  */
 321 char **
 322 sepstring (const char *s)
 323 {
 324   char **res;
 325   const char *p;
 326   int i = 0;
 327
 328   if (!s || !*s)
 329     return NULL;
 330   res = NULL;
 331   p = s;
 332   while (*s)
 333     {
 334       if (*s == ',')
 335         {
 336           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 337           res[i] = strdupdelim (p, s);
 338           res[++i] = NULL;
 339           ++s;
 340           /* Skip the blanks following the ','.  */
 341           while (ISSPACE (*s))
 342             ++s;
 343           p = s;
 344         }
 345       else
 346         ++s;
 347     }
 348   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 349   res[i] = strdupdelim (p, s);
 350   res[i + 1] = NULL;
 351   return res;
 352 }
 353 \f
 354 /* Return pointer to a static char[] buffer in which zero-terminated
 355    string-representation of TM (in form hh:mm:ss) is printed.
 356
 357    If TM is non-NULL, the current time-in-seconds will be stored
 358    there.
 359
 360    (#### This is misleading: one would expect TM would be used instead
 361    of the current time in that case.  This design was probably
 362    influenced by the design time(2), and should be changed at some
 363    points.  No callers use non-NULL TM anyway.)  */
 364
 365 char *
 366 time_str (time_t *tm)
 367 {
 368   static char output[15];
 369   struct tm *ptm;
 370   time_t secs = time (tm);
 371
 372   if (secs == -1)
 373     {
 374       /* In case of error, return the empty string.  Maybe we should
 375          just abort if this happens?  */
 376       *output = '\0';
 377       return output;
 378     }
 379   ptm = localtime (&secs);
 380   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 381   return output;
 382 }
 383
 384 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 385
 386 char *
 387 datetime_str (time_t *tm)
 388 {
 389   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 390   struct tm *ptm;
 391   time_t secs = time (tm);
 392
 393   if (secs == -1)
 394     {
 395       /* In case of error, return the empty string.  Maybe we should
 396          just abort if this happens?  */
 397       *output = '\0';
 398       return output;
 399     }
 400   ptm = localtime (&secs);
 401   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 402            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 403            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 404   return output;
 405 }
 406
 407 /* Returns an error message for ERRNUM.  #### This requires more work.
 408    This function, as well as the whole error system, is very
 409    ill-conceived.  */
 410 const char *
 411 uerrmsg (uerr_t errnum)
 412 {
 413   switch (errnum)
 414     {
 415     case URLUNKNOWN:
 416       return _("Unknown/unsupported protocol");
 417       break;
 418     case URLBADPORT:
 419       return _("Invalid port specification");
 420       break;
 421     case URLBADHOST:
 422       return _("Invalid host name");
 423       break;
 424     default:
 425       abort ();
 426       /* $@#@#$ compiler.  */
 427       return NULL;
 428     }
 429 }
 430 \f
 431 /* The Windows versions of the following two functions are defined in
 432    mswindows.c.  */
 433
 434 #ifndef WINDOWS
 435 void
 436 fork_to_background (void)
 437 {
 438   pid_t pid;
 439   /* Whether we arrange our own version of opt.lfilename here.  */
 440   int changedp = 0;
 441
 442   if (!opt.lfilename)
 443     {
 444       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 445       changedp = 1;
 446     }
 447   pid = fork ();
 448   if (pid < 0)
 449     {
 450       /* parent, error */
 451       perror ("fork");
 452       exit (1);
 453     }
 454   else if (pid != 0)
 455     {
 456       /* parent, no error */
 457       printf (_("Continuing in background.\n"));
 458       if (changedp)
 459         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 460       exit (0);
 461     }
 462   /* child: keep running */
 463 }
 464 #endif /* not WINDOWS */
 465 \f
 466 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 467    in that:
 468         Multple `/'s are collapsed to a single `/'.
 469         Leading `./'s and trailing `/.'s are removed.
 470         Trailing `/'s are removed.
 471         Non-leading `../'s and trailing `..'s are handled by removing
 472         portions of the path.
 473
 474    E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
 475    from GNU Bash.
 476
 477    Changes for Wget:
 478         Always use '/' as stub_char.
 479         Don't check for local things using canon_stat.
 480         Change the original string instead of strdup-ing.
 481         React correctly when beginning with `./' and `../'.  */
 482 void
 483 path_simplify (char *path)
 484 {
 485   register int i, start, ddot;
 486   char stub_char;
 487
 488   if (!*path)
 489     return;
 490
 491   /*stub_char = (*path == '/') ? '/' : '.';*/
 492   stub_char = '/';
 493
 494   /* Addition: Remove all `./'-s preceding the string.  If `../'-s
 495      precede, put `/' in front and remove them too.  */
 496   i = 0;
 497   ddot = 0;
 498   while (1)
 499     {
 500       if (path[i] == '.' && path[i + 1] == '/')
 501         i += 2;
 502       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 503         {
 504           i += 3;
 505           ddot = 1;
 506         }
 507       else
 508         break;
 509     }
 510   if (i)
 511     strcpy (path, path + i - ddot);
 512
 513   /* Replace single `.' or `..' with `/'.  */
 514   if ((path[0] == '.' && path[1] == '\0')
 515       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 516     {
 517       path[0] = stub_char;
 518       path[1] = '\0';
 519       return;
 520     }
 521   /* Walk along PATH looking for things to compact.  */
 522   i = 0;
 523   while (1)
 524     {
 525       if (!path[i])
 526         break;
 527
 528       while (path[i] && path[i] != '/')
 529         i++;
 530
 531       start = i++;
 532
 533       /* If we didn't find any slashes, then there is nothing left to do.  */
 534       if (!path[start])
 535         break;
 536
 537       /* Handle multiple `/'s in a row.  */
 538       while (path[i] == '/')
 539         i++;
 540
 541       if ((start + 1) != i)
 542         {
 543           strcpy (path + start + 1, path + i);
 544           i = start + 1;
 545         }
 546
 547       /* Check for trailing `/'.  */
 548       if (start && !path[i])
 549         {
 550         zero_last:
 551           path[--i] = '\0';
 552           break;
 553         }
 554
 555       /* Check for `../', `./' or trailing `.' by itself.  */
 556       if (path[i] == '.')
 557         {
 558           /* Handle trailing `.' by itself.  */
 559           if (!path[i + 1])
 560             goto zero_last;
 561
 562           /* Handle `./'.  */
 563           if (path[i + 1] == '/')
 564             {
 565               strcpy (path + i, path + i + 1);
 566               i = (start < 0) ? 0 : start;
 567               continue;
 568             }
 569
 570           /* Handle `../' or trailing `..' by itself.  */
 571           if (path[i + 1] == '.' &&
 572               (path[i + 2] == '/' || !path[i + 2]))
 573             {
 574               while (--start > -1 && path[start] != '/');
 575               strcpy (path + start + 1, path + i + 2);
 576               i = (start < 0) ? 0 : start;
 577               continue;
 578             }
 579         }       /* path == '.' */
 580     } /* while */
 581
 582   if (!*path)
 583     {
 584       *path = stub_char;
 585       path[1] = '\0';
 586     }
 587 }
 588 \f
 589 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 590    specified with TM.  */
 591 void
 592 touch (const char *file, time_t tm)
 593 {
 594 #ifdef HAVE_STRUCT_UTIMBUF
 595   struct utimbuf times;
 596   times.actime = times.modtime = tm;
 597 #else
 598   time_t times[2];
 599   times[0] = times[1] = tm;
 600 #endif
 601
 602   if (utime (file, &times) == -1)
 603     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 604 }
 605
 606 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 607    nothing under MS-Windows.  */
 608 int
 609 remove_link (const char *file)
 610 {
 611   int err = 0;
 612   struct stat st;
 613
 614   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 615     {
 616       DEBUGP (("Unlinking %s (symlink).\n", file));
 617       err = unlink (file);
 618       if (err != 0)
 619         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 620                    file, strerror (errno));
 621     }
 622   return err;
 623 }
 624
 625 /* Does FILENAME exist?  This is quite a lousy implementation, since
 626    it supplies no error codes -- only a yes-or-no answer.  Thus it
 627    will return that a file does not exist if, e.g., the directory is
 628    unreadable.  I don't mind it too much currently, though.  The
 629    proper way should, of course, be to have a third, error state,
 630    other than true/false, but that would introduce uncalled-for
 631    additional complexity to the callers.  */
 632 int
 633 file_exists_p (const char *filename)
 634 {
 635 #ifdef HAVE_ACCESS
 636   return access (filename, F_OK) >= 0;
 637 #else
 638   struct stat buf;
 639   return stat (filename, &buf) >= 0;
 640 #endif
 641 }
 642
 643 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 644    Returns 0 on error.  */
 645 int
 646 file_non_directory_p (const char *path)
 647 {
 648   struct stat buf;
 649   /* Use lstat() rather than stat() so that symbolic links pointing to
 650      directories can be identified correctly.  */
 651   if (lstat (path, &buf) != 0)
 652     return 0;
 653   return S_ISDIR (buf.st_mode) ? 0 : 1;
 654 }
 655
 656 /* Return a unique filename, given a prefix and count */
 657 static char *
 658 unique_name_1 (const char *fileprefix, int count)
 659 {
 660   char *filename;
 661
 662   if (count)
 663     {
 664       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 665       sprintf (filename, "%s.%d", fileprefix, count);
 666     }
 667   else
 668     filename = xstrdup (fileprefix);
 669
 670   if (!file_exists_p (filename))
 671     return filename;
 672   else
 673     {
 674       xfree (filename);
 675       return NULL;
 676     }
 677 }
 678
 679 /* Return a unique file name, based on PREFIX.  */
 680 char *
 681 unique_name (const char *prefix)
 682 {
 683   char *file = NULL;
 684   int count = 0;
 685
 686   while (!file)
 687     file = unique_name_1 (prefix, count++);
 688   return file;
 689 }
 690 \f
 691 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 692    are missing, create them first.  In case any mkdir() call fails,
 693    return its error status.  Returns 0 on successful completion.
 694
 695    The behaviour of this function should be identical to the behaviour
 696    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 697 int
 698 make_directory (const char *directory)
 699 {
 700   int quit = 0;
 701   int i;
 702   char *dir;
 703
 704   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 705      function is unsafe if called with a read-only char *argument.  */
 706   STRDUP_ALLOCA (dir, directory);
 707
 708   /* If the first character of dir is '/', skip it (and thus enable
 709      creation of absolute-pathname directories.  */
 710   for (i = (*dir == '/'); 1; ++i)
 711     {
 712       for (; dir[i] && dir[i] != '/'; i++)
 713         ;
 714       if (!dir[i])
 715         quit = 1;
 716       dir[i] = '\0';
 717       /* Check whether the directory already exists.  */
 718       if (!file_exists_p (dir))
 719         {
 720           if (mkdir (dir, 0777) < 0)
 721             return -1;
 722         }
 723       if (quit)
 724         break;
 725       else
 726         dir[i] = '/';
 727     }
 728   return 0;
 729 }
 730 \f
 731 static int in_acclist PARAMS ((const char *const *, const char *, int));
 732
 733 /* Determine whether a file is acceptable to be followed, according to
 734    lists of patterns to accept/reject.  */
 735 int
 736 acceptable (const char *s)
 737 {
 738   int l = strlen (s);
 739
 740   while (l && s[l] != '/')
 741     --l;
 742   if (s[l] == '/')
 743     s += (l + 1);
 744   if (opt.accepts)
 745     {
 746       if (opt.rejects)
 747         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 748                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 749       else
 750         return in_acclist ((const char *const *)opt.accepts, s, 1);
 751     }
 752   else if (opt.rejects)
 753     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 754   return 1;
 755 }
 756
 757 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 758    `/something', frontcmp() will return 1 only if S2 begins with
 759    `/something'.  Otherwise, 0 is returned.  */
 760 int
 761 frontcmp (const char *s1, const char *s2)
 762 {
 763   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 764   return !*s1;
 765 }
 766
 767 /* Iterate through STRLIST, and return the first element that matches
 768    S, through wildcards or front comparison (as appropriate).  */
 769 static char *
 770 proclist (char **strlist, const char *s, enum accd flags)
 771 {
 772   char **x;
 773
 774   for (x = strlist; *x; x++)
 775     if (has_wildcards_p (*x))
 776       {
 777         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 778           break;
 779       }
 780     else
 781       {
 782         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 783         if (frontcmp (p, s))
 784           break;
 785       }
 786   return *x;
 787 }
 788
 789 /* Returns whether DIRECTORY is acceptable for download, wrt the
 790    include/exclude lists.
 791
 792    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 793    and absolute paths may be freely intermixed.  */
 794 int
 795 accdir (const char *directory, enum accd flags)
 796 {
 797   /* Remove starting '/'.  */
 798   if (flags & ALLABS && *directory == '/')
 799     ++directory;
 800   if (opt.includes)
 801     {
 802       if (!proclist (opt.includes, directory, flags))
 803         return 0;
 804     }
 805   if (opt.excludes)
 806     {
 807       if (proclist (opt.excludes, directory, flags))
 808         return 0;
 809     }
 810   return 1;
 811 }
 812
 813 /* Match the end of STRING against PATTERN.  For instance:
 814
 815    match_backwards ("abc", "bc") -> 1
 816    match_backwards ("abc", "ab") -> 0
 817    match_backwards ("abc", "abc") -> 1 */
 818 static int
 819 match_backwards (const char *string, const char *pattern)
 820 {
 821   int i, j;
 822
 823   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 824     if (string[i] != pattern[j])
 825       break;
 826   /* If the pattern was exhausted, the match was succesful.  */
 827   if (j == -1)
 828     return 1;
 829   else
 830     return 0;
 831 }
 832
 833 /* Checks whether string S matches each element of ACCEPTS.  A list
 834    element are matched either with fnmatch() or match_backwards(),
 835    according to whether the element contains wildcards or not.
 836
 837    If the BACKWARD is 0, don't do backward comparison -- just compare
 838    them normally.  */
 839 static int
 840 in_acclist (const char *const *accepts, const char *s, int backward)
 841 {
 842   for (; *accepts; accepts++)
 843     {
 844       if (has_wildcards_p (*accepts))
 845         {
 846           /* fnmatch returns 0 if the pattern *does* match the
 847              string.  */
 848           if (fnmatch (*accepts, s, 0) == 0)
 849             return 1;
 850         }
 851       else
 852         {
 853           if (backward)
 854             {
 855               if (match_backwards (s, *accepts))
 856                 return 1;
 857             }
 858           else
 859             {
 860               if (!strcmp (s, *accepts))
 861                 return 1;
 862             }
 863         }
 864     }
 865   return 0;
 866 }
 867
 868 /* Return the malloc-ed suffix of STR.  For instance:
 869    suffix ("foo.bar")       -> "bar"
 870    suffix ("foo.bar.baz")   -> "baz"
 871    suffix ("/foo/bar")      -> NULL
 872    suffix ("/foo.bar/baz")  -> NULL  */
 873 char *
 874 suffix (const char *str)
 875 {
 876   int i;
 877
 878   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
 879   if (str[i++] == '.')
 880     return xstrdup (str + i);
 881   else
 882     return NULL;
 883 }
 884
 885 /* Read a line from FP.  The function reallocs the storage as needed
 886    to accomodate for any length of the line.  Reallocs are done
 887    storage exponentially, doubling the storage after each overflow to
 888    minimize the number of calls to realloc() and fgets().  The newline
 889    character at the end of line is retained.
 890
 891    After end-of-file is encountered without anything being read, NULL
 892    is returned.  NULL is also returned on error.  To distinguish
 893    between these two cases, use the stdio function ferror().  */
 894
 895 char *
 896 read_whole_line (FILE *fp)
 897 {
 898   int length = 0;
 899   int bufsize = 81;
 900   char *line = (char *)xmalloc (bufsize);
 901
 902   while (fgets (line + length, bufsize - length, fp))
 903     {
 904       length += strlen (line + length);
 905       assert (length > 0);
 906       if (line[length - 1] == '\n')
 907         break;
 908       /* fgets() guarantees to read the whole line, or to use up the
 909          space we've given it.  We can double the buffer
 910          unconditionally.  */
 911       bufsize <<= 1;
 912       line = xrealloc (line, bufsize);
 913     }
 914   if (length == 0 || ferror (fp))
 915     {
 916       xfree (line);
 917       return NULL;
 918     }
 919   if (length + 1 < bufsize)
 920     /* Relieve the memory from our exponential greediness.  We say
 921        `length + 1' because the terminating \0 is not included in
 922        LENGTH.  We don't need to zero-terminate the string ourselves,
 923        though, because fgets() does that.  */
 924     line = xrealloc (line, length + 1);
 925   return line;
 926 }
 927 \f
 928 /* Read FILE into memory.  A pointer to `struct file_memory' are
 929    returned; use struct element `content' to access file contents, and
 930    the element `length' to know the file length.  `content' is *not*
 931    zero-terminated, and you should *not* read or write beyond the [0,
 932    length) range of characters.
 933
 934    After you are done with the file contents, call read_file_free to
 935    release the memory.
 936
 937    Depending on the operating system and the type of file that is
 938    being read, read_file() either mmap's the file into memory, or
 939    reads the file into the core using read().
 940
 941    If file is named "-", fileno(stdin) is used for reading instead.
 942    If you want to read from a real file named "-", use "./-" instead.  */
 943
 944 struct file_memory *
 945 read_file (const char *file)
 946 {
 947   int fd;
 948   struct file_memory *fm;
 949   long size;
 950   int inhibit_close = 0;
 951
 952   /* Some magic in the finest tradition of Perl and its kin: if FILE
 953      is "-", just use stdin.  */
 954   if (HYPHENP (file))
 955     {
 956       fd = fileno (stdin);
 957       inhibit_close = 1;
 958       /* Note that we don't inhibit mmap() in this case.  If stdin is
 959          redirected from a regular file, mmap() will still work.  */
 960     }
 961   else
 962     fd = open (file, O_RDONLY);
 963   if (fd < 0)
 964     return NULL;
 965   fm = xmalloc (sizeof (struct file_memory));
 966
 967 #ifdef HAVE_MMAP
 968   {
 969     struct stat buf;
 970     if (fstat (fd, &buf) < 0)
 971       goto mmap_lose;
 972     fm->length = buf.st_size;
 973     /* NOTE: As far as I know, the callers of this function never
 974        modify the file text.  Relying on this would enable us to
 975        specify PROT_READ and MAP_SHARED for a marginal gain in
 976        efficiency, but at some cost to generality.  */
 977     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 978                         MAP_PRIVATE, fd, 0);
 979     if (fm->content == (char *)MAP_FAILED)
 980       goto mmap_lose;
 981     if (!inhibit_close)
 982       close (fd);
 983
 984     fm->mmap_p = 1;
 985     return fm;
 986   }
 987
 988  mmap_lose:
 989   /* The most common reason why mmap() fails is that FD does not point
 990      to a plain file.  However, it's also possible that mmap() doesn't
 991      work for a particular type of file.  Therefore, whenever mmap()
 992      fails, we just fall back to the regular method.  */
 993 #endif /* HAVE_MMAP */
 994
 995   fm->length = 0;
 996   size = 512;                   /* number of bytes fm->contents can
 997                                    hold at any given time. */
 998   fm->content = xmalloc (size);
 999   while (1)
1000     {
1001       long nread;
1002       if (fm->length > size / 2)
1003         {
1004           /* #### I'm not sure whether the whole exponential-growth
1005              thing makes sense with kernel read.  On Linux at least,
1006              read() refuses to read more than 4K from a file at a
1007              single chunk anyway.  But other Unixes might optimize it
1008              better, and it doesn't *hurt* anything, so I'm leaving
1009              it.  */
1010
1011           /* Normally, we grow SIZE exponentially to make the number
1012              of calls to read() and realloc() logarithmic in relation
1013              to file size.  However, read() can read an amount of data
1014              smaller than requested, and it would be unreasonably to
1015              double SIZE every time *something* was read.  Therefore,
1016              we double SIZE only when the length exceeds half of the
1017              entire allocated size.  */
1018           size <<= 1;
1019           fm->content = xrealloc (fm->content, size);
1020         }
1021       nread = read (fd, fm->content + fm->length, size - fm->length);
1022       if (nread > 0)
1023         /* Successful read. */
1024         fm->length += nread;
1025       else if (nread < 0)
1026         /* Error. */
1027         goto lose;
1028       else
1029         /* EOF */
1030         break;
1031     }
1032   if (!inhibit_close)
1033     close (fd);
1034   if (size > fm->length && fm->length != 0)
1035     /* Due to exponential growth of fm->content, the allocated region
1036        might be much larger than what is actually needed.  */
1037     fm->content = xrealloc (fm->content, fm->length);
1038   fm->mmap_p = 0;
1039   return fm;
1040
1041  lose:
1042   if (!inhibit_close)
1043     close (fd);
1044   xfree (fm->content);
1045   xfree (fm);
1046   return NULL;
1047 }
1048
1049 /* Release the resources held by FM.  Specifically, this calls
1050    munmap() or xfree() on fm->content, depending whether mmap or
1051    malloc/read were used to read in the file.  It also frees the
1052    memory needed to hold the FM structure itself.  */
1053
1054 void
1055 read_file_free (struct file_memory *fm)
1056 {
1057 #ifdef HAVE_MMAP
1058   if (fm->mmap_p)
1059     {
1060       munmap (fm->content, fm->length);
1061     }
1062   else
1063 #endif
1064     {
1065       xfree (fm->content);
1066     }
1067   xfree (fm);
1068 }
1069 \f
1070 /* Free the pointers in a NULL-terminated vector of pointers, then
1071    free the pointer itself.  */
1072 void
1073 free_vec (char **vec)
1074 {
1075   if (vec)
1076     {
1077       char **p = vec;
1078       while (*p)
1079         xfree (*p++);
1080       xfree (vec);
1081     }
1082 }
1083
1084 /* Append vector V2 to vector V1.  The function frees V2 and
1085    reallocates V1 (thus you may not use the contents of neither
1086    pointer after the call).  If V1 is NULL, V2 is returned.  */
1087 char **
1088 merge_vecs (char **v1, char **v2)
1089 {
1090   int i, j;
1091
1092   if (!v1)
1093     return v2;
1094   if (!v2)
1095     return v1;
1096   if (!*v2)
1097     {
1098       /* To avoid j == 0 */
1099       xfree (v2);
1100       return v1;
1101     }
1102   /* Count v1.  */
1103   for (i = 0; v1[i]; i++);
1104   /* Count v2.  */
1105   for (j = 0; v2[j]; j++);
1106   /* Reallocate v1.  */
1107   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1108   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1109   xfree (v2);
1110   return v1;
1111 }
1112
1113 /* A set of simple-minded routines to store strings in a linked list.
1114    This used to also be used for searching, but now we have hash
1115    tables for that.  */
1116
1117 /* It's a shame that these simple things like linked lists and hash
1118    tables (see hash.c) need to be implemented over and over again.  It
1119    would be nice to be able to use the routines from glib -- see
1120    www.gtk.org for details.  However, that would make Wget depend on
1121    glib, and I want to avoid dependencies to external libraries for
1122    reasons of convenience and portability (I suspect Wget is more
1123    portable than anything ever written for Gnome).  */
1124
1125 /* Append an element to the list.  If the list has a huge number of
1126    elements, this can get slow because it has to find the list's
1127    ending.  If you think you have to call slist_append in a loop,
1128    think about calling slist_prepend() followed by slist_nreverse().  */
1129
1130 slist *
1131 slist_append (slist *l, const char *s)
1132 {
1133   slist *newel = (slist *)xmalloc (sizeof (slist));
1134   slist *beg = l;
1135
1136   newel->string = xstrdup (s);
1137   newel->next = NULL;
1138
1139   if (!l)
1140     return newel;
1141   /* Find the last element.  */
1142   while (l->next)
1143     l = l->next;
1144   l->next = newel;
1145   return beg;
1146 }
1147
1148 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1149
1150 slist *
1151 slist_prepend (slist *l, const char *s)
1152 {
1153   slist *newel = (slist *)xmalloc (sizeof (slist));
1154   newel->string = xstrdup (s);
1155   newel->next = l;
1156   return newel;
1157 }
1158
1159 /* Destructively reverse L. */
1160
1161 slist *
1162 slist_nreverse (slist *l)
1163 {
1164   slist *prev = NULL;
1165   while (l)
1166     {
1167       slist *next = l->next;
1168       l->next = prev;
1169       prev = l;
1170       l = next;
1171     }
1172   return prev;
1173 }
1174
1175 /* Is there a specific entry in the list?  */
1176 int
1177 slist_contains (slist *l, const char *s)
1178 {
1179   for (; l; l = l->next)
1180     if (!strcmp (l->string, s))
1181       return 1;
1182   return 0;
1183 }
1184
1185 /* Free the whole slist.  */
1186 void
1187 slist_free (slist *l)
1188 {
1189   while (l)
1190     {
1191       slist *n = l->next;
1192       xfree (l->string);
1193       xfree (l);
1194       l = n;
1195     }
1196 }
1197 \f
1198 /* Sometimes it's useful to create "sets" of strings, i.e. special
1199    hash tables where you want to store strings as keys and merely
1200    query for their existence.  Here is a set of utility routines that
1201    makes that transparent.  */
1202
1203 void
1204 string_set_add (struct hash_table *ht, const char *s)
1205 {
1206   /* First check whether the set element already exists.  If it does,
1207      do nothing so that we don't have to free() the old element and
1208      then strdup() a new one.  */
1209   if (hash_table_contains (ht, s))
1210     return;
1211
1212   /* We use "1" as value.  It provides us a useful and clear arbitrary
1213      value, and it consumes no memory -- the pointers to the same
1214      string "1" will be shared by all the key-value pairs in all `set'
1215      hash tables.  */
1216   hash_table_put (ht, xstrdup (s), "1");
1217 }
1218
1219 /* Synonym for hash_table_contains... */
1220
1221 int
1222 string_set_contains (struct hash_table *ht, const char *s)
1223 {
1224   return hash_table_contains (ht, s);
1225 }
1226
1227 static int
1228 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1229 {
1230   xfree (key);
1231   return 0;
1232 }
1233
1234 void
1235 string_set_free (struct hash_table *ht)
1236 {
1237   hash_table_map (ht, string_set_free_mapper, NULL);
1238   hash_table_destroy (ht);
1239 }
1240
1241 static int
1242 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1243 {
1244   xfree (key);
1245   xfree (value);
1246   return 0;
1247 }
1248
1249 /* Another utility function: call free() on all keys and values of HT.  */
1250
1251 void
1252 free_keys_and_values (struct hash_table *ht)
1253 {
1254   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1255 }
1256
1257 \f
1258 /* Engine for legible and legible_very_long; this function works on
1259    strings.  */
1260
1261 static char *
1262 legible_1 (const char *repr)
1263 {
1264   static char outbuf[128];
1265   int i, i1, mod;
1266   char *outptr;
1267   const char *inptr;
1268
1269   /* Reset the pointers.  */
1270   outptr = outbuf;
1271   inptr = repr;
1272   /* If the number is negative, shift the pointers.  */
1273   if (*inptr == '-')
1274     {
1275       *outptr++ = '-';
1276       ++inptr;
1277     }
1278   /* How many digits before the first separator?  */
1279   mod = strlen (inptr) % 3;
1280   /* Insert them.  */
1281   for (i = 0; i < mod; i++)
1282     *outptr++ = inptr[i];
1283   /* Now insert the rest of them, putting separator before every
1284      third digit.  */
1285   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1286     {
1287       if (i % 3 == 0 && i1 != 0)
1288         *outptr++ = ',';
1289       *outptr++ = inptr[i1];
1290     }
1291   /* Zero-terminate the string.  */
1292   *outptr = '\0';
1293   return outbuf;
1294 }
1295
1296 /* Legible -- return a static pointer to the legibly printed long.  */
1297 char *
1298 legible (long l)
1299 {
1300   char inbuf[24];
1301   /* Print the number into the buffer.  */
1302   long_to_string (inbuf, l);
1303   return legible_1 (inbuf);
1304 }
1305
1306 /* Write a string representation of NUMBER into the provided buffer.
1307    We cannot use sprintf() because we cannot be sure whether the
1308    platform supports printing of what we chose for VERY_LONG_TYPE.
1309
1310    Example: Gcc supports `long long' under many platforms, but on many
1311    of those the native libc knows nothing of it and therefore cannot
1312    print it.
1313
1314    How long BUFFER needs to be depends on the platform and the content
1315    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1316    bytes are sufficient.  Using more might be a good idea.
1317
1318    This function does not go through the hoops that long_to_string
1319    goes to because it doesn't aspire to be fast.  (It's called perhaps
1320    once in a Wget run.)  */
1321
1322 static void
1323 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1324 {
1325   int i = 0;
1326   int j;
1327
1328   /* Print the number backwards... */
1329   do
1330     {
1331       buffer[i++] = '0' + number % 10;
1332       number /= 10;
1333     }
1334   while (number);
1335
1336   /* ...and reverse the order of the digits. */
1337   for (j = 0; j < i / 2; j++)
1338     {
1339       char c = buffer[j];
1340       buffer[j] = buffer[i - 1 - j];
1341       buffer[i - 1 - j] = c;
1342     }
1343   buffer[i] = '\0';
1344 }
1345
1346 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1347 char *
1348 legible_very_long (VERY_LONG_TYPE l)
1349 {
1350   char inbuf[128];
1351   /* Print the number into the buffer.  */
1352   very_long_to_string (inbuf, l);
1353   return legible_1 (inbuf);
1354 }
1355
1356 /* Count the digits in a (long) integer.  */
1357 int
1358 numdigit (long a)
1359 {
1360   int res = 1;
1361   if (a < 0)
1362     {
1363       a = -a;
1364       ++res;
1365     }
1366   while ((a /= 10) != 0)
1367     ++res;
1368   return res;
1369 }
1370
1371 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1372 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1373
1374 #define DIGITS_1(figure) ONE_DIGIT (figure)
1375 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1376 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1377 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1378 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1379 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1380 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1381 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1382 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1383 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1384
1385 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1386
1387 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1388 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1389 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1390 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1391 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1392 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1393 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1394 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1395 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1396
1397 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1398    to `sprintf(buffer, "%ld", number)', only much faster.
1399
1400    The speedup may make a difference in programs that frequently
1401    convert numbers to strings.  Some implementations of sprintf,
1402    particularly the one in GNU libc, have been known to be extremely
1403    slow compared to this function.
1404
1405    BUFFER should accept as many bytes as you expect the number to take
1406    up.  On machines with 64-bit longs the maximum needed size is 24
1407    bytes.  That includes the worst-case digits, the optional `-' sign,
1408    and the trailing \0.  */
1409
1410 void
1411 long_to_string (char *buffer, long number)
1412 {
1413   char *p = buffer;
1414   long n = number;
1415
1416 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1417   /* We are running in a strange or misconfigured environment.  Let
1418      sprintf cope with it.  */
1419   sprintf (buffer, "%ld", n);
1420 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1421
1422   if (n < 0)
1423     {
1424       *p++ = '-';
1425       n = -n;
1426     }
1427
1428   if      (n < 10)                   { DIGITS_1 (1); }
1429   else if (n < 100)                  { DIGITS_2 (10); }
1430   else if (n < 1000)                 { DIGITS_3 (100); }
1431   else if (n < 10000)                { DIGITS_4 (1000); }
1432   else if (n < 100000)               { DIGITS_5 (10000); }
1433   else if (n < 1000000)              { DIGITS_6 (100000); }
1434   else if (n < 10000000)             { DIGITS_7 (1000000); }
1435   else if (n < 100000000)            { DIGITS_8 (10000000); }
1436   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1437 #if SIZEOF_LONG == 4
1438   /* ``if (1)'' serves only to preserve editor indentation. */
1439   else if (1)                        { DIGITS_10 (1000000000); }
1440 #else  /* SIZEOF_LONG != 4 */
1441   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1442   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1443   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1444   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1445   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1446   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1447   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1448   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1449   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1450   else                               { DIGITS_19 (1000000000000000000L); }
1451 #endif /* SIZEOF_LONG != 4 */
1452
1453   *p = '\0';
1454 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1455 }
1456
1457 #undef ONE_DIGIT
1458 #undef ONE_DIGIT_ADVANCE
1459
1460 #undef DIGITS_1
1461 #undef DIGITS_2
1462 #undef DIGITS_3
1463 #undef DIGITS_4
1464 #undef DIGITS_5
1465 #undef DIGITS_6
1466 #undef DIGITS_7
1467 #undef DIGITS_8
1468 #undef DIGITS_9
1469 #undef DIGITS_10
1470 #undef DIGITS_11
1471 #undef DIGITS_12
1472 #undef DIGITS_13
1473 #undef DIGITS_14
1474 #undef DIGITS_15
1475 #undef DIGITS_16
1476 #undef DIGITS_17
1477 #undef DIGITS_18
1478 #undef DIGITS_19
1479 \f
1480 /* Support for timers. */
1481
1482 #undef TIMER_WINDOWS
1483 #undef TIMER_GETTIMEOFDAY
1484 #undef TIMER_TIME
1485
1486 /* Depending on the OS and availability of gettimeofday(), one and
1487    only one of the above constants will be defined.  Virtually all
1488    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1489    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1490    non-Windows systems without gettimeofday.
1491
1492    #### Perhaps we should also support ftime(), which exists on old
1493    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1494    C, if memory serves me.)  */
1495
1496 #ifdef WINDOWS
1497 # define TIMER_WINDOWS
1498 #else  /* not WINDOWS */
1499 # ifdef HAVE_GETTIMEOFDAY
1500 #  define TIMER_GETTIMEOFDAY
1501 # else
1502 #  define TIMER_TIME
1503 # endif
1504 #endif /* not WINDOWS */
1505
1506 struct wget_timer {
1507 #ifdef TIMER_GETTIMEOFDAY
1508   long secs;
1509   long usecs;
1510 #endif
1511
1512 #ifdef TIMER_TIME
1513   time_t secs;
1514 #endif
1515
1516 #ifdef TIMER_WINDOWS
1517   ULARGE_INTEGER wintime;
1518 #endif
1519 };
1520
1521 /* Allocate a timer.  It is not legal to do anything with a freshly
1522    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1523
1524 struct wget_timer *
1525 wtimer_allocate (void)
1526 {
1527   struct wget_timer *wt =
1528     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1529   return wt;
1530 }
1531
1532 /* Allocate a new timer and reset it.  Return the new timer. */
1533
1534 struct wget_timer *
1535 wtimer_new (void)
1536 {
1537   struct wget_timer *wt = wtimer_allocate ();
1538   wtimer_reset (wt);
1539   return wt;
1540 }
1541
1542 /* Free the resources associated with the timer.  Its further use is
1543    prohibited.  */
1544
1545 void
1546 wtimer_delete (struct wget_timer *wt)
1547 {
1548   xfree (wt);
1549 }
1550
1551 /* Reset timer WT.  This establishes the starting point from which
1552    wtimer_elapsed() will return the number of elapsed
1553    milliseconds.  It is allowed to reset a previously used timer.  */
1554
1555 void
1556 wtimer_reset (struct wget_timer *wt)
1557 {
1558 #ifdef TIMER_GETTIMEOFDAY
1559   struct timeval t;
1560   gettimeofday (&t, NULL);
1561   wt->secs  = t.tv_sec;
1562   wt->usecs = t.tv_usec;
1563 #endif
1564
1565 #ifdef TIMER_TIME
1566   wt->secs = time (NULL);
1567 #endif
1568
1569 #ifdef TIMER_WINDOWS
1570   FILETIME ft;
1571   SYSTEMTIME st;
1572   GetSystemTime (&st);
1573   SystemTimeToFileTime (&st, &ft);
1574   wt->wintime.HighPart = ft.dwHighDateTime;
1575   wt->wintime.LowPart  = ft.dwLowDateTime;
1576 #endif
1577 }
1578
1579 /* Return the number of milliseconds elapsed since the timer was last
1580    reset.  It is allowed to call this function more than once to get
1581    increasingly higher elapsed values.  */
1582
1583 long
1584 wtimer_elapsed (struct wget_timer *wt)
1585 {
1586 #ifdef TIMER_GETTIMEOFDAY
1587   struct timeval t;
1588   gettimeofday (&t, NULL);
1589   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1590 #endif
1591
1592 #ifdef TIMER_TIME
1593   time_t now = time (NULL);
1594   return 1000 * (now - wt->secs);
1595 #endif
1596
1597 #ifdef WINDOWS
1598   FILETIME ft;
1599   SYSTEMTIME st;
1600   ULARGE_INTEGER uli;
1601   GetSystemTime (&st);
1602   SystemTimeToFileTime (&st, &ft);
1603   uli.HighPart = ft.dwHighDateTime;
1604   uli.LowPart = ft.dwLowDateTime;
1605   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1606 #endif
1607 }
1608
1609 /* Return the assessed granularity of the timer implementation.  This
1610    is important for certain code that tries to deal with "zero" time
1611    intervals.  */
1612
1613 long
1614 wtimer_granularity (void)
1615 {
1616 #ifdef TIMER_GETTIMEOFDAY
1617   /* Granularity of gettimeofday is hugely architecture-dependent.
1618      However, it appears that on modern machines it is better than
1619      1ms.  */
1620   return 1;
1621 #endif
1622
1623 #ifdef TIMER_TIME
1624   /* This is clear. */
1625   return 1000;
1626 #endif
1627
1628 #ifdef TIMER_WINDOWS
1629   /* ? */
1630   return 1;
1631 #endif
1632 }
1633 \f
1634 /* This should probably be at a better place, but it doesn't really
1635    fit into html-parse.c.  */
1636
1637 /* The function returns the pointer to the malloc-ed quoted version of
1638    string s.  It will recognize and quote numeric and special graphic
1639    entities, as per RFC1866:
1640
1641    `&' -> `&amp;'
1642    `<' -> `&lt;'
1643    `>' -> `&gt;'
1644    `"' -> `&quot;'
1645    SP  -> `&#32;'
1646
1647    No other entities are recognized or replaced.  */
1648 char *
1649 html_quote_string (const char *s)
1650 {
1651   const char *b = s;
1652   char *p, *res;
1653   int i;
1654
1655   /* Pass through the string, and count the new size.  */
1656   for (i = 0; *s; s++, i++)
1657     {
1658       if (*s == '&')
1659         i += 4;                 /* `amp;' */
1660       else if (*s == '<' || *s == '>')
1661         i += 3;                 /* `lt;' and `gt;' */
1662       else if (*s == '\"')
1663         i += 5;                 /* `quot;' */
1664       else if (*s == ' ')
1665         i += 4;                 /* #32; */
1666     }
1667   res = (char *)xmalloc (i + 1);
1668   s = b;
1669   for (p = res; *s; s++)
1670     {
1671       switch (*s)
1672         {
1673         case '&':
1674           *p++ = '&';
1675           *p++ = 'a';
1676           *p++ = 'm';
1677           *p++ = 'p';
1678           *p++ = ';';
1679           break;
1680         case '<': case '>':
1681           *p++ = '&';
1682           *p++ = (*s == '<' ? 'l' : 'g');
1683           *p++ = 't';
1684           *p++ = ';';
1685           break;
1686         case '\"':
1687           *p++ = '&';
1688           *p++ = 'q';
1689           *p++ = 'u';
1690           *p++ = 'o';
1691           *p++ = 't';
1692           *p++ = ';';
1693           break;
1694         case ' ':
1695           *p++ = '&';
1696           *p++ = '#';
1697           *p++ = '3';
1698           *p++ = '2';
1699           *p++ = ';';
1700           break;
1701         default:
1702           *p++ = *s;
1703         }
1704     }
1705   *p = '\0';
1706   return res;
1707 }