sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 #include "wget.h"
  55 #include "utils.h"
  56 #include "fnmatch.h"
  57 #include "hash.h"
  58
  59 #ifndef errno
  60 extern int errno;
  61 #endif
  62
  63 /* This section implements several wrappers around the basic
  64    allocation routines.  This is done for two reasons: first, so that
  65    the callers of these functions need not consistently check for
  66    errors.  If there is not enough virtual memory for running Wget,
  67    something is seriously wrong, and Wget exits with an appropriate
  68    error message.
  69
  70    The second reason why these are useful is that, if DEBUG_MALLOC is
  71    defined, they also provide a handy (if crude) malloc debugging
  72    interface that checks memory leaks.  */
  73
  74 /* Croak the fatal memory error and bail out with non-zero exit
  75    status.  */
  76 static void
  77 memfatal (const char *what)
  78 {
  79   /* HACK: expose save_log_p from log.c, so we can turn it off in
  80      order to prevent saving the log.  Saving the log is dangerous
  81      because logprintf() and logputs() can call malloc(), so this
  82      could infloop.  When logging is turned off, infloop can no longer
  83      happen.
  84
  85      #### This is no longer really necessary because the new routines
  86      in log.c cons only if the line exceeds eighty characters.  But
  87      this can come at the end of a line, so it's OK to be careful.
  88
  89      On a more serious note, it would be good to have a
  90      log_forced_shutdown() routine that exposes this cleanly.  */
  91   extern int save_log_p;
  92
  93   save_log_p = 0;
  94   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  95   exit (1);
  96 }
  97
  98 /* These functions end with _real because they need to be
  99    distinguished from the debugging functions, and from the macros.
 100    Explanation follows:
 101
 102    If memory debugging is not turned on, wget.h defines these:
 103
 104      #define xmalloc xmalloc_real
 105      #define xrealloc xrealloc_real
 106      #define xstrdup xstrdup_real
 107      #define xfree free
 108
 109    In case of memory debugging, the definitions are a bit more
 110    complex, because we want to provide more information, *and* we want
 111    to call the debugging code.  (The former is the reason why xmalloc
 112    and friends need to be macros in the first place.)  Then it looks
 113    like this:
 114
 115      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 116      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 117      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 118      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 119
 120    Each of the *_debug function does its magic and calls the real one.  */
 121
 122 #ifdef DEBUG_MALLOC
 123 # define STATIC_IF_DEBUG static
 124 #else
 125 # define STATIC_IF_DEBUG
 126 #endif
 127
 128 STATIC_IF_DEBUG void *
 129 xmalloc_real (size_t size)
 130 {
 131   void *ptr = malloc (size);
 132   if (!ptr)
 133     memfatal ("malloc");
 134   return ptr;
 135 }
 136
 137 STATIC_IF_DEBUG void *
 138 xrealloc_real (void *ptr, size_t newsize)
 139 {
 140   void *newptr;
 141
 142   /* Not all Un*xes have the feature of realloc() that calling it with
 143      a NULL-pointer is the same as malloc(), but it is easy to
 144      simulate.  */
 145   if (ptr)
 146     newptr = realloc (ptr, newsize);
 147   else
 148     newptr = malloc (newsize);
 149   if (!newptr)
 150     memfatal ("realloc");
 151   return newptr;
 152 }
 153
 154 STATIC_IF_DEBUG char *
 155 xstrdup_real (const char *s)
 156 {
 157   char *copy;
 158
 159 #ifndef HAVE_STRDUP
 160   int l = strlen (s);
 161   copy = malloc (l + 1);
 162   if (!copy)
 163     memfatal ("strdup");
 164   memcpy (copy, s, l + 1);
 165 #else  /* HAVE_STRDUP */
 166   copy = strdup (s);
 167   if (!copy)
 168     memfatal ("strdup");
 169 #endif /* HAVE_STRDUP */
 170
 171   return copy;
 172 }
 173
 174 #ifdef DEBUG_MALLOC
 175
 176 /* Crude home-grown routines for debugging some malloc-related
 177    problems.  Featured:
 178
 179    * Counting the number of malloc and free invocations, and reporting
 180      the "balance", i.e. how many times more malloc was called than it
 181      was the case with free.
 182
 183    * Making malloc store its entry into a simple array and free remove
 184      stuff from that array.  At the end, print the pointers which have
 185      not been freed, along with the source file and the line number.
 186      This also has the side-effect of detecting freeing memory that
 187      was never allocated.
 188
 189    Note that this kind of memory leak checking strongly depends on
 190    every malloc() being followed by a free(), even if the program is
 191    about to finish.  Wget is careful to free the data structure it
 192    allocated in init.c.  */
 193
 194 static int malloc_count, free_count;
 195
 196 static struct {
 197   char *ptr;
 198   const char *file;
 199   int line;
 200 } malloc_debug[100000];
 201
 202 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 203    which can be a real problem.  It would be nice to use a hash table
 204    for malloc_debug, but the functions in hash.c are not suitable
 205    because they can call malloc() themselves.  Maybe it would work if
 206    the hash table were preallocated to a huge size, and if we set the
 207    rehash threshold to 1.0.  */
 208
 209 /* Register PTR in malloc_debug.  Abort if this is not possible
 210    (presumably due to the number of current allocations exceeding the
 211    size of malloc_debug.)  */
 212
 213 static void
 214 register_ptr (void *ptr, const char *file, int line)
 215 {
 216   int i;
 217   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 218     if (malloc_debug[i].ptr == NULL)
 219       {
 220         malloc_debug[i].ptr = ptr;
 221         malloc_debug[i].file = file;
 222         malloc_debug[i].line = line;
 223         return;
 224       }
 225   abort ();
 226 }
 227
 228 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 229    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 230
 231 static void
 232 unregister_ptr (void *ptr)
 233 {
 234   int i;
 235   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 236     if (malloc_debug[i].ptr == ptr)
 237       {
 238         malloc_debug[i].ptr = NULL;
 239         return;
 240       }
 241   abort ();
 242 }
 243
 244 /* Print the malloc debug stats that can be gathered from the above
 245    information.  Currently this is the count of mallocs, frees, the
 246    difference between the two, and the dump of the contents of
 247    malloc_debug.  The last part are the memory leaks.  */
 248
 249 void
 250 print_malloc_debug_stats (void)
 251 {
 252   int i;
 253   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 254           malloc_count, free_count, malloc_count - free_count);
 255   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 256     if (malloc_debug[i].ptr != NULL)
 257       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 258               malloc_debug[i].file, malloc_debug[i].line);
 259 }
 260
 261 void *
 262 xmalloc_debug (size_t size, const char *source_file, int source_line)
 263 {
 264   void *ptr = xmalloc_real (size);
 265   ++malloc_count;
 266   register_ptr (ptr, source_file, source_line);
 267   return ptr;
 268 }
 269
 270 void
 271 xfree_debug (void *ptr, const char *source_file, int source_line)
 272 {
 273   assert (ptr != NULL);
 274   ++free_count;
 275   unregister_ptr (ptr);
 276   free (ptr);
 277 }
 278
 279 void *
 280 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 281 {
 282   void *newptr = xrealloc_real (ptr, newsize);
 283   if (!ptr)
 284     {
 285       ++malloc_count;
 286       register_ptr (newptr, source_file, source_line);
 287     }
 288   else if (newptr != ptr)
 289     {
 290       unregister_ptr (ptr);
 291       register_ptr (newptr, source_file, source_line);
 292     }
 293   return newptr;
 294 }
 295
 296 char *
 297 xstrdup_debug (const char *s, const char *source_file, int source_line)
 298 {
 299   char *copy = xstrdup_real (s);
 300   ++malloc_count;
 301   register_ptr (copy, source_file, source_line);
 302   return copy;
 303 }
 304
 305 #endif /* DEBUG_MALLOC */
 306 \f
 307 /* Copy the string formed by two pointers (one on the beginning, other
 308    on the char after the last char) to a new, malloc-ed location.
 309    0-terminate it.  */
 310 char *
 311 strdupdelim (const char *beg, const char *end)
 312 {
 313   char *res = (char *)xmalloc (end - beg + 1);
 314   memcpy (res, beg, end - beg);
 315   res[end - beg] = '\0';
 316   return res;
 317 }
 318
 319 /* Parse a string containing comma-separated elements, and return a
 320    vector of char pointers with the elements.  Spaces following the
 321    commas are ignored.  */
 322 char **
 323 sepstring (const char *s)
 324 {
 325   char **res;
 326   const char *p;
 327   int i = 0;
 328
 329   if (!s || !*s)
 330     return NULL;
 331   res = NULL;
 332   p = s;
 333   while (*s)
 334     {
 335       if (*s == ',')
 336         {
 337           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 338           res[i] = strdupdelim (p, s);
 339           res[++i] = NULL;
 340           ++s;
 341           /* Skip the blanks following the ','.  */
 342           while (ISSPACE (*s))
 343             ++s;
 344           p = s;
 345         }
 346       else
 347         ++s;
 348     }
 349   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 350   res[i] = strdupdelim (p, s);
 351   res[i + 1] = NULL;
 352   return res;
 353 }
 354 \f
 355 /* Return pointer to a static char[] buffer in which zero-terminated
 356    string-representation of TM (in form hh:mm:ss) is printed.
 357
 358    If TM is non-NULL, the current time-in-seconds will be stored
 359    there.
 360
 361    (#### This is misleading: one would expect TM would be used instead
 362    of the current time in that case.  This design was probably
 363    influenced by the design time(2), and should be changed at some
 364    points.  No callers use non-NULL TM anyway.)  */
 365
 366 char *
 367 time_str (time_t *tm)
 368 {
 369   static char output[15];
 370   struct tm *ptm;
 371   time_t secs = time (tm);
 372
 373   if (secs == -1)
 374     {
 375       /* In case of error, return the empty string.  Maybe we should
 376          just abort if this happens?  */
 377       *output = '\0';
 378       return output;
 379     }
 380   ptm = localtime (&secs);
 381   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 382   return output;
 383 }
 384
 385 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 386
 387 char *
 388 datetime_str (time_t *tm)
 389 {
 390   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 391   struct tm *ptm;
 392   time_t secs = time (tm);
 393
 394   if (secs == -1)
 395     {
 396       /* In case of error, return the empty string.  Maybe we should
 397          just abort if this happens?  */
 398       *output = '\0';
 399       return output;
 400     }
 401   ptm = localtime (&secs);
 402   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 403            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 404            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 405   return output;
 406 }
 407 \f
 408 /* The Windows versions of the following two functions are defined in
 409    mswindows.c.  */
 410
 411 #ifndef WINDOWS
 412 void
 413 fork_to_background (void)
 414 {
 415   pid_t pid;
 416   /* Whether we arrange our own version of opt.lfilename here.  */
 417   int changedp = 0;
 418
 419   if (!opt.lfilename)
 420     {
 421       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 422       changedp = 1;
 423     }
 424   pid = fork ();
 425   if (pid < 0)
 426     {
 427       /* parent, error */
 428       perror ("fork");
 429       exit (1);
 430     }
 431   else if (pid != 0)
 432     {
 433       /* parent, no error */
 434       printf (_("Continuing in background.\n"));
 435       if (changedp)
 436         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 437       exit (0);
 438     }
 439   /* child: keep running */
 440 }
 441 #endif /* not WINDOWS */
 442 \f
 443 char *
 444 ps (char *orig)
 445 {
 446   char *r = xstrdup (orig);
 447   path_simplify (r);
 448   return r;
 449 }
 450
 451 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 452    in that:
 453         Multple `/'s are collapsed to a single `/'.
 454         Leading `./'s and trailing `/.'s are removed.
 455         Trailing `/'s are removed.
 456         Non-leading `../'s and trailing `..'s are handled by removing
 457         portions of the path.
 458
 459    E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
 460    from GNU Bash.
 461
 462    Changes for Wget:
 463         Always use '/' as stub_char.
 464         Don't check for local things using canon_stat.
 465         Change the original string instead of strdup-ing.
 466         React correctly when beginning with `./' and `../'.
 467         Don't zip out trailing slashes.  */
 468 void
 469 path_simplify (char *path)
 470 {
 471   register int i, start, ddot;
 472   char stub_char;
 473
 474   if (!*path)
 475     return;
 476
 477   /*stub_char = (*path == '/') ? '/' : '.';*/
 478   stub_char = '/';
 479
 480   /* Addition: Remove all `./'-s preceding the string.  If `../'-s
 481      precede, put `/' in front and remove them too.  */
 482   i = 0;
 483   ddot = 0;
 484   while (1)
 485     {
 486       if (path[i] == '.' && path[i + 1] == '/')
 487         i += 2;
 488       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 489         {
 490           i += 3;
 491           ddot = 1;
 492         }
 493       else
 494         break;
 495     }
 496   if (i)
 497     strcpy (path, path + i - ddot);
 498
 499   /* Replace single `.' or `..' with `/'.  */
 500   if ((path[0] == '.' && path[1] == '\0')
 501       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 502     {
 503       path[0] = stub_char;
 504       path[1] = '\0';
 505       return;
 506     }
 507   /* Walk along PATH looking for things to compact.  */
 508   i = 0;
 509   while (1)
 510     {
 511       if (!path[i])
 512         break;
 513
 514       while (path[i] && path[i] != '/')
 515         i++;
 516
 517       start = i++;
 518
 519       /* If we didn't find any slashes, then there is nothing left to do.  */
 520       if (!path[start])
 521         break;
 522
 523       /* Handle multiple `/'s in a row.  */
 524       while (path[i] == '/')
 525         i++;
 526
 527       if ((start + 1) != i)
 528         {
 529           strcpy (path + start + 1, path + i);
 530           i = start + 1;
 531         }
 532
 533       /* Check for `../', `./' or trailing `.' by itself.  */
 534       if (path[i] == '.')
 535         {
 536           /* Handle trailing `.' by itself.  */
 537           if (!path[i + 1])
 538             {
 539               path[--i] = '\0';
 540               break;
 541             }
 542
 543           /* Handle `./'.  */
 544           if (path[i + 1] == '/')
 545             {
 546               strcpy (path + i, path + i + 1);
 547               i = (start < 0) ? 0 : start;
 548               continue;
 549             }
 550
 551           /* Handle `../' or trailing `..' by itself.  */
 552           if (path[i + 1] == '.' &&
 553               (path[i + 2] == '/' || !path[i + 2]))
 554             {
 555               while (--start > -1 && path[start] != '/');
 556               strcpy (path + start + 1, path + i + 2);
 557               i = (start < 0) ? 0 : start;
 558               continue;
 559             }
 560         }       /* path == '.' */
 561     } /* while */
 562 }
 563 \f
 564 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 565    specified with TM.  */
 566 void
 567 touch (const char *file, time_t tm)
 568 {
 569 #ifdef HAVE_STRUCT_UTIMBUF
 570   struct utimbuf times;
 571   times.actime = times.modtime = tm;
 572 #else
 573   time_t times[2];
 574   times[0] = times[1] = tm;
 575 #endif
 576
 577   if (utime (file, &times) == -1)
 578     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 579 }
 580
 581 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 582    nothing under MS-Windows.  */
 583 int
 584 remove_link (const char *file)
 585 {
 586   int err = 0;
 587   struct stat st;
 588
 589   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 590     {
 591       DEBUGP (("Unlinking %s (symlink).\n", file));
 592       err = unlink (file);
 593       if (err != 0)
 594         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 595                    file, strerror (errno));
 596     }
 597   return err;
 598 }
 599
 600 /* Does FILENAME exist?  This is quite a lousy implementation, since
 601    it supplies no error codes -- only a yes-or-no answer.  Thus it
 602    will return that a file does not exist if, e.g., the directory is
 603    unreadable.  I don't mind it too much currently, though.  The
 604    proper way should, of course, be to have a third, error state,
 605    other than true/false, but that would introduce uncalled-for
 606    additional complexity to the callers.  */
 607 int
 608 file_exists_p (const char *filename)
 609 {
 610 #ifdef HAVE_ACCESS
 611   return access (filename, F_OK) >= 0;
 612 #else
 613   struct stat buf;
 614   return stat (filename, &buf) >= 0;
 615 #endif
 616 }
 617
 618 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 619    Returns 0 on error.  */
 620 int
 621 file_non_directory_p (const char *path)
 622 {
 623   struct stat buf;
 624   /* Use lstat() rather than stat() so that symbolic links pointing to
 625      directories can be identified correctly.  */
 626   if (lstat (path, &buf) != 0)
 627     return 0;
 628   return S_ISDIR (buf.st_mode) ? 0 : 1;
 629 }
 630
 631 /* Return a unique filename, given a prefix and count */
 632 static char *
 633 unique_name_1 (const char *fileprefix, int count)
 634 {
 635   char *filename;
 636
 637   if (count)
 638     {
 639       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 640       sprintf (filename, "%s.%d", fileprefix, count);
 641     }
 642   else
 643     filename = xstrdup (fileprefix);
 644
 645   if (!file_exists_p (filename))
 646     return filename;
 647   else
 648     {
 649       xfree (filename);
 650       return NULL;
 651     }
 652 }
 653
 654 /* Return a unique file name, based on PREFIX.  */
 655 char *
 656 unique_name (const char *prefix)
 657 {
 658   char *file = NULL;
 659   int count = 0;
 660
 661   while (!file)
 662     file = unique_name_1 (prefix, count++);
 663   return file;
 664 }
 665 \f
 666 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 667    are missing, create them first.  In case any mkdir() call fails,
 668    return its error status.  Returns 0 on successful completion.
 669
 670    The behaviour of this function should be identical to the behaviour
 671    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 672 int
 673 make_directory (const char *directory)
 674 {
 675   int quit = 0;
 676   int i;
 677   char *dir;
 678
 679   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 680      function is unsafe if called with a read-only char *argument.  */
 681   STRDUP_ALLOCA (dir, directory);
 682
 683   /* If the first character of dir is '/', skip it (and thus enable
 684      creation of absolute-pathname directories.  */
 685   for (i = (*dir == '/'); 1; ++i)
 686     {
 687       for (; dir[i] && dir[i] != '/'; i++)
 688         ;
 689       if (!dir[i])
 690         quit = 1;
 691       dir[i] = '\0';
 692       /* Check whether the directory already exists.  */
 693       if (!file_exists_p (dir))
 694         {
 695           if (mkdir (dir, 0777) < 0)
 696             return -1;
 697         }
 698       if (quit)
 699         break;
 700       else
 701         dir[i] = '/';
 702     }
 703   return 0;
 704 }
 705
 706 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 707    should be a file name.  For example, file_merge("/foo/bar", "baz")
 708    will return "/foo/baz".  file_merge("/foo/bar/", "baz") will return
 709    "foo/bar/baz".
 710
 711    In other words, it's a simpler and gentler version of uri_merge_1.  */
 712
 713 char *
 714 file_merge (const char *base, const char *file)
 715 {
 716   char *result;
 717   const char *cut = (const char *)strrchr (base, '/');
 718
 719   if (!cut)
 720     cut = base + strlen (base);
 721
 722   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 723   memcpy (result, base, cut - base);
 724   result[cut - base] = '/';
 725   strcpy (result + (cut - base) + 1, file);
 726
 727   return result;
 728 }
 729 \f
 730 static int in_acclist PARAMS ((const char *const *, const char *, int));
 731
 732 /* Determine whether a file is acceptable to be followed, according to
 733    lists of patterns to accept/reject.  */
 734 int
 735 acceptable (const char *s)
 736 {
 737   int l = strlen (s);
 738
 739   while (l && s[l] != '/')
 740     --l;
 741   if (s[l] == '/')
 742     s += (l + 1);
 743   if (opt.accepts)
 744     {
 745       if (opt.rejects)
 746         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 747                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 748       else
 749         return in_acclist ((const char *const *)opt.accepts, s, 1);
 750     }
 751   else if (opt.rejects)
 752     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 753   return 1;
 754 }
 755
 756 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 757    `/something', frontcmp() will return 1 only if S2 begins with
 758    `/something'.  Otherwise, 0 is returned.  */
 759 int
 760 frontcmp (const char *s1, const char *s2)
 761 {
 762   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 763   return !*s1;
 764 }
 765
 766 /* Iterate through STRLIST, and return the first element that matches
 767    S, through wildcards or front comparison (as appropriate).  */
 768 static char *
 769 proclist (char **strlist, const char *s, enum accd flags)
 770 {
 771   char **x;
 772
 773   for (x = strlist; *x; x++)
 774     if (has_wildcards_p (*x))
 775       {
 776         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 777           break;
 778       }
 779     else
 780       {
 781         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 782         if (frontcmp (p, s))
 783           break;
 784       }
 785   return *x;
 786 }
 787
 788 /* Returns whether DIRECTORY is acceptable for download, wrt the
 789    include/exclude lists.
 790
 791    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 792    and absolute paths may be freely intermixed.  */
 793 int
 794 accdir (const char *directory, enum accd flags)
 795 {
 796   /* Remove starting '/'.  */
 797   if (flags & ALLABS && *directory == '/')
 798     ++directory;
 799   if (opt.includes)
 800     {
 801       if (!proclist (opt.includes, directory, flags))
 802         return 0;
 803     }
 804   if (opt.excludes)
 805     {
 806       if (proclist (opt.excludes, directory, flags))
 807         return 0;
 808     }
 809   return 1;
 810 }
 811
 812 /* Match the end of STRING against PATTERN.  For instance:
 813
 814    match_backwards ("abc", "bc") -> 1
 815    match_backwards ("abc", "ab") -> 0
 816    match_backwards ("abc", "abc") -> 1 */
 817 static int
 818 match_backwards (const char *string, const char *pattern)
 819 {
 820   int i, j;
 821
 822   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 823     if (string[i] != pattern[j])
 824       break;
 825   /* If the pattern was exhausted, the match was succesful.  */
 826   if (j == -1)
 827     return 1;
 828   else
 829     return 0;
 830 }
 831
 832 /* Checks whether string S matches each element of ACCEPTS.  A list
 833    element are matched either with fnmatch() or match_backwards(),
 834    according to whether the element contains wildcards or not.
 835
 836    If the BACKWARD is 0, don't do backward comparison -- just compare
 837    them normally.  */
 838 static int
 839 in_acclist (const char *const *accepts, const char *s, int backward)
 840 {
 841   for (; *accepts; accepts++)
 842     {
 843       if (has_wildcards_p (*accepts))
 844         {
 845           /* fnmatch returns 0 if the pattern *does* match the
 846              string.  */
 847           if (fnmatch (*accepts, s, 0) == 0)
 848             return 1;
 849         }
 850       else
 851         {
 852           if (backward)
 853             {
 854               if (match_backwards (s, *accepts))
 855                 return 1;
 856             }
 857           else
 858             {
 859               if (!strcmp (s, *accepts))
 860                 return 1;
 861             }
 862         }
 863     }
 864   return 0;
 865 }
 866
 867 /* Return the malloc-ed suffix of STR.  For instance:
 868    suffix ("foo.bar")       -> "bar"
 869    suffix ("foo.bar.baz")   -> "baz"
 870    suffix ("/foo/bar")      -> NULL
 871    suffix ("/foo.bar/baz")  -> NULL  */
 872 char *
 873 suffix (const char *str)
 874 {
 875   int i;
 876
 877   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
 878   if (str[i++] == '.')
 879     return xstrdup (str + i);
 880   else
 881     return NULL;
 882 }
 883
 884 /* Read a line from FP.  The function reallocs the storage as needed
 885    to accomodate for any length of the line.  Reallocs are done
 886    storage exponentially, doubling the storage after each overflow to
 887    minimize the number of calls to realloc() and fgets().  The newline
 888    character at the end of line is retained.
 889
 890    After end-of-file is encountered without anything being read, NULL
 891    is returned.  NULL is also returned on error.  To distinguish
 892    between these two cases, use the stdio function ferror().  */
 893
 894 char *
 895 read_whole_line (FILE *fp)
 896 {
 897   int length = 0;
 898   int bufsize = 81;
 899   char *line = (char *)xmalloc (bufsize);
 900
 901   while (fgets (line + length, bufsize - length, fp))
 902     {
 903       length += strlen (line + length);
 904       assert (length > 0);
 905       if (line[length - 1] == '\n')
 906         break;
 907       /* fgets() guarantees to read the whole line, or to use up the
 908          space we've given it.  We can double the buffer
 909          unconditionally.  */
 910       bufsize <<= 1;
 911       line = xrealloc (line, bufsize);
 912     }
 913   if (length == 0 || ferror (fp))
 914     {
 915       xfree (line);
 916       return NULL;
 917     }
 918   if (length + 1 < bufsize)
 919     /* Relieve the memory from our exponential greediness.  We say
 920        `length + 1' because the terminating \0 is not included in
 921        LENGTH.  We don't need to zero-terminate the string ourselves,
 922        though, because fgets() does that.  */
 923     line = xrealloc (line, length + 1);
 924   return line;
 925 }
 926 \f
 927 /* Read FILE into memory.  A pointer to `struct file_memory' are
 928    returned; use struct element `content' to access file contents, and
 929    the element `length' to know the file length.  `content' is *not*
 930    zero-terminated, and you should *not* read or write beyond the [0,
 931    length) range of characters.
 932
 933    After you are done with the file contents, call read_file_free to
 934    release the memory.
 935
 936    Depending on the operating system and the type of file that is
 937    being read, read_file() either mmap's the file into memory, or
 938    reads the file into the core using read().
 939
 940    If file is named "-", fileno(stdin) is used for reading instead.
 941    If you want to read from a real file named "-", use "./-" instead.  */
 942
 943 struct file_memory *
 944 read_file (const char *file)
 945 {
 946   int fd;
 947   struct file_memory *fm;
 948   long size;
 949   int inhibit_close = 0;
 950
 951   /* Some magic in the finest tradition of Perl and its kin: if FILE
 952      is "-", just use stdin.  */
 953   if (HYPHENP (file))
 954     {
 955       fd = fileno (stdin);
 956       inhibit_close = 1;
 957       /* Note that we don't inhibit mmap() in this case.  If stdin is
 958          redirected from a regular file, mmap() will still work.  */
 959     }
 960   else
 961     fd = open (file, O_RDONLY);
 962   if (fd < 0)
 963     return NULL;
 964   fm = xmalloc (sizeof (struct file_memory));
 965
 966 #ifdef HAVE_MMAP
 967   {
 968     struct stat buf;
 969     if (fstat (fd, &buf) < 0)
 970       goto mmap_lose;
 971     fm->length = buf.st_size;
 972     /* NOTE: As far as I know, the callers of this function never
 973        modify the file text.  Relying on this would enable us to
 974        specify PROT_READ and MAP_SHARED for a marginal gain in
 975        efficiency, but at some cost to generality.  */
 976     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 977                         MAP_PRIVATE, fd, 0);
 978     if (fm->content == (char *)MAP_FAILED)
 979       goto mmap_lose;
 980     if (!inhibit_close)
 981       close (fd);
 982
 983     fm->mmap_p = 1;
 984     return fm;
 985   }
 986
 987  mmap_lose:
 988   /* The most common reason why mmap() fails is that FD does not point
 989      to a plain file.  However, it's also possible that mmap() doesn't
 990      work for a particular type of file.  Therefore, whenever mmap()
 991      fails, we just fall back to the regular method.  */
 992 #endif /* HAVE_MMAP */
 993
 994   fm->length = 0;
 995   size = 512;                   /* number of bytes fm->contents can
 996                                    hold at any given time. */
 997   fm->content = xmalloc (size);
 998   while (1)
 999     {
1000       long nread;
1001       if (fm->length > size / 2)
1002         {
1003           /* #### I'm not sure whether the whole exponential-growth
1004              thing makes sense with kernel read.  On Linux at least,
1005              read() refuses to read more than 4K from a file at a
1006              single chunk anyway.  But other Unixes might optimize it
1007              better, and it doesn't *hurt* anything, so I'm leaving
1008              it.  */
1009
1010           /* Normally, we grow SIZE exponentially to make the number
1011              of calls to read() and realloc() logarithmic in relation
1012              to file size.  However, read() can read an amount of data
1013              smaller than requested, and it would be unreasonably to
1014              double SIZE every time *something* was read.  Therefore,
1015              we double SIZE only when the length exceeds half of the
1016              entire allocated size.  */
1017           size <<= 1;
1018           fm->content = xrealloc (fm->content, size);
1019         }
1020       nread = read (fd, fm->content + fm->length, size - fm->length);
1021       if (nread > 0)
1022         /* Successful read. */
1023         fm->length += nread;
1024       else if (nread < 0)
1025         /* Error. */
1026         goto lose;
1027       else
1028         /* EOF */
1029         break;
1030     }
1031   if (!inhibit_close)
1032     close (fd);
1033   if (size > fm->length && fm->length != 0)
1034     /* Due to exponential growth of fm->content, the allocated region
1035        might be much larger than what is actually needed.  */
1036     fm->content = xrealloc (fm->content, fm->length);
1037   fm->mmap_p = 0;
1038   return fm;
1039
1040  lose:
1041   if (!inhibit_close)
1042     close (fd);
1043   xfree (fm->content);
1044   xfree (fm);
1045   return NULL;
1046 }
1047
1048 /* Release the resources held by FM.  Specifically, this calls
1049    munmap() or xfree() on fm->content, depending whether mmap or
1050    malloc/read were used to read in the file.  It also frees the
1051    memory needed to hold the FM structure itself.  */
1052
1053 void
1054 read_file_free (struct file_memory *fm)
1055 {
1056 #ifdef HAVE_MMAP
1057   if (fm->mmap_p)
1058     {
1059       munmap (fm->content, fm->length);
1060     }
1061   else
1062 #endif
1063     {
1064       xfree (fm->content);
1065     }
1066   xfree (fm);
1067 }
1068 \f
1069 /* Free the pointers in a NULL-terminated vector of pointers, then
1070    free the pointer itself.  */
1071 void
1072 free_vec (char **vec)
1073 {
1074   if (vec)
1075     {
1076       char **p = vec;
1077       while (*p)
1078         xfree (*p++);
1079       xfree (vec);
1080     }
1081 }
1082
1083 /* Append vector V2 to vector V1.  The function frees V2 and
1084    reallocates V1 (thus you may not use the contents of neither
1085    pointer after the call).  If V1 is NULL, V2 is returned.  */
1086 char **
1087 merge_vecs (char **v1, char **v2)
1088 {
1089   int i, j;
1090
1091   if (!v1)
1092     return v2;
1093   if (!v2)
1094     return v1;
1095   if (!*v2)
1096     {
1097       /* To avoid j == 0 */
1098       xfree (v2);
1099       return v1;
1100     }
1101   /* Count v1.  */
1102   for (i = 0; v1[i]; i++);
1103   /* Count v2.  */
1104   for (j = 0; v2[j]; j++);
1105   /* Reallocate v1.  */
1106   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1107   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1108   xfree (v2);
1109   return v1;
1110 }
1111
1112 /* A set of simple-minded routines to store strings in a linked list.
1113    This used to also be used for searching, but now we have hash
1114    tables for that.  */
1115
1116 /* It's a shame that these simple things like linked lists and hash
1117    tables (see hash.c) need to be implemented over and over again.  It
1118    would be nice to be able to use the routines from glib -- see
1119    www.gtk.org for details.  However, that would make Wget depend on
1120    glib, and I want to avoid dependencies to external libraries for
1121    reasons of convenience and portability (I suspect Wget is more
1122    portable than anything ever written for Gnome).  */
1123
1124 /* Append an element to the list.  If the list has a huge number of
1125    elements, this can get slow because it has to find the list's
1126    ending.  If you think you have to call slist_append in a loop,
1127    think about calling slist_prepend() followed by slist_nreverse().  */
1128
1129 slist *
1130 slist_append (slist *l, const char *s)
1131 {
1132   slist *newel = (slist *)xmalloc (sizeof (slist));
1133   slist *beg = l;
1134
1135   newel->string = xstrdup (s);
1136   newel->next = NULL;
1137
1138   if (!l)
1139     return newel;
1140   /* Find the last element.  */
1141   while (l->next)
1142     l = l->next;
1143   l->next = newel;
1144   return beg;
1145 }
1146
1147 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1148
1149 slist *
1150 slist_prepend (slist *l, const char *s)
1151 {
1152   slist *newel = (slist *)xmalloc (sizeof (slist));
1153   newel->string = xstrdup (s);
1154   newel->next = l;
1155   return newel;
1156 }
1157
1158 /* Destructively reverse L. */
1159
1160 slist *
1161 slist_nreverse (slist *l)
1162 {
1163   slist *prev = NULL;
1164   while (l)
1165     {
1166       slist *next = l->next;
1167       l->next = prev;
1168       prev = l;
1169       l = next;
1170     }
1171   return prev;
1172 }
1173
1174 /* Is there a specific entry in the list?  */
1175 int
1176 slist_contains (slist *l, const char *s)
1177 {
1178   for (; l; l = l->next)
1179     if (!strcmp (l->string, s))
1180       return 1;
1181   return 0;
1182 }
1183
1184 /* Free the whole slist.  */
1185 void
1186 slist_free (slist *l)
1187 {
1188   while (l)
1189     {
1190       slist *n = l->next;
1191       xfree (l->string);
1192       xfree (l);
1193       l = n;
1194     }
1195 }
1196 \f
1197 /* Sometimes it's useful to create "sets" of strings, i.e. special
1198    hash tables where you want to store strings as keys and merely
1199    query for their existence.  Here is a set of utility routines that
1200    makes that transparent.  */
1201
1202 void
1203 string_set_add (struct hash_table *ht, const char *s)
1204 {
1205   /* First check whether the set element already exists.  If it does,
1206      do nothing so that we don't have to free() the old element and
1207      then strdup() a new one.  */
1208   if (hash_table_contains (ht, s))
1209     return;
1210
1211   /* We use "1" as value.  It provides us a useful and clear arbitrary
1212      value, and it consumes no memory -- the pointers to the same
1213      string "1" will be shared by all the key-value pairs in all `set'
1214      hash tables.  */
1215   hash_table_put (ht, xstrdup (s), "1");
1216 }
1217
1218 /* Synonym for hash_table_contains... */
1219
1220 int
1221 string_set_contains (struct hash_table *ht, const char *s)
1222 {
1223   return hash_table_contains (ht, s);
1224 }
1225
1226 static int
1227 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1228 {
1229   xfree (key);
1230   return 0;
1231 }
1232
1233 void
1234 string_set_free (struct hash_table *ht)
1235 {
1236   hash_table_map (ht, string_set_free_mapper, NULL);
1237   hash_table_destroy (ht);
1238 }
1239
1240 static int
1241 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1242 {
1243   xfree (key);
1244   xfree (value);
1245   return 0;
1246 }
1247
1248 /* Another utility function: call free() on all keys and values of HT.  */
1249
1250 void
1251 free_keys_and_values (struct hash_table *ht)
1252 {
1253   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1254 }
1255
1256 \f
1257 /* Engine for legible and legible_very_long; this function works on
1258    strings.  */
1259
1260 static char *
1261 legible_1 (const char *repr)
1262 {
1263   static char outbuf[128];
1264   int i, i1, mod;
1265   char *outptr;
1266   const char *inptr;
1267
1268   /* Reset the pointers.  */
1269   outptr = outbuf;
1270   inptr = repr;
1271   /* If the number is negative, shift the pointers.  */
1272   if (*inptr == '-')
1273     {
1274       *outptr++ = '-';
1275       ++inptr;
1276     }
1277   /* How many digits before the first separator?  */
1278   mod = strlen (inptr) % 3;
1279   /* Insert them.  */
1280   for (i = 0; i < mod; i++)
1281     *outptr++ = inptr[i];
1282   /* Now insert the rest of them, putting separator before every
1283      third digit.  */
1284   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1285     {
1286       if (i % 3 == 0 && i1 != 0)
1287         *outptr++ = ',';
1288       *outptr++ = inptr[i1];
1289     }
1290   /* Zero-terminate the string.  */
1291   *outptr = '\0';
1292   return outbuf;
1293 }
1294
1295 /* Legible -- return a static pointer to the legibly printed long.  */
1296 char *
1297 legible (long l)
1298 {
1299   char inbuf[24];
1300   /* Print the number into the buffer.  */
1301   long_to_string (inbuf, l);
1302   return legible_1 (inbuf);
1303 }
1304
1305 /* Write a string representation of NUMBER into the provided buffer.
1306    We cannot use sprintf() because we cannot be sure whether the
1307    platform supports printing of what we chose for VERY_LONG_TYPE.
1308
1309    Example: Gcc supports `long long' under many platforms, but on many
1310    of those the native libc knows nothing of it and therefore cannot
1311    print it.
1312
1313    How long BUFFER needs to be depends on the platform and the content
1314    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1315    bytes are sufficient.  Using more might be a good idea.
1316
1317    This function does not go through the hoops that long_to_string
1318    goes to because it doesn't aspire to be fast.  (It's called perhaps
1319    once in a Wget run.)  */
1320
1321 static void
1322 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1323 {
1324   int i = 0;
1325   int j;
1326
1327   /* Print the number backwards... */
1328   do
1329     {
1330       buffer[i++] = '0' + number % 10;
1331       number /= 10;
1332     }
1333   while (number);
1334
1335   /* ...and reverse the order of the digits. */
1336   for (j = 0; j < i / 2; j++)
1337     {
1338       char c = buffer[j];
1339       buffer[j] = buffer[i - 1 - j];
1340       buffer[i - 1 - j] = c;
1341     }
1342   buffer[i] = '\0';
1343 }
1344
1345 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1346 char *
1347 legible_very_long (VERY_LONG_TYPE l)
1348 {
1349   char inbuf[128];
1350   /* Print the number into the buffer.  */
1351   very_long_to_string (inbuf, l);
1352   return legible_1 (inbuf);
1353 }
1354
1355 /* Count the digits in a (long) integer.  */
1356 int
1357 numdigit (long a)
1358 {
1359   int res = 1;
1360   if (a < 0)
1361     {
1362       a = -a;
1363       ++res;
1364     }
1365   while ((a /= 10) != 0)
1366     ++res;
1367   return res;
1368 }
1369
1370 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1371 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1372
1373 #define DIGITS_1(figure) ONE_DIGIT (figure)
1374 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1375 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1376 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1377 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1378 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1379 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1380 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1381 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1382 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1383
1384 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1385
1386 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1387 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1388 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1389 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1390 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1391 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1392 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1393 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1394 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1395
1396 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1397    to `sprintf(buffer, "%ld", number)', only much faster.
1398
1399    The speedup may make a difference in programs that frequently
1400    convert numbers to strings.  Some implementations of sprintf,
1401    particularly the one in GNU libc, have been known to be extremely
1402    slow compared to this function.
1403
1404    BUFFER should accept as many bytes as you expect the number to take
1405    up.  On machines with 64-bit longs the maximum needed size is 24
1406    bytes.  That includes the worst-case digits, the optional `-' sign,
1407    and the trailing \0.  */
1408
1409 void
1410 long_to_string (char *buffer, long number)
1411 {
1412   char *p = buffer;
1413   long n = number;
1414
1415 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1416   /* We are running in a strange or misconfigured environment.  Let
1417      sprintf cope with it.  */
1418   sprintf (buffer, "%ld", n);
1419 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1420
1421   if (n < 0)
1422     {
1423       *p++ = '-';
1424       n = -n;
1425     }
1426
1427   if      (n < 10)                   { DIGITS_1 (1); }
1428   else if (n < 100)                  { DIGITS_2 (10); }
1429   else if (n < 1000)                 { DIGITS_3 (100); }
1430   else if (n < 10000)                { DIGITS_4 (1000); }
1431   else if (n < 100000)               { DIGITS_5 (10000); }
1432   else if (n < 1000000)              { DIGITS_6 (100000); }
1433   else if (n < 10000000)             { DIGITS_7 (1000000); }
1434   else if (n < 100000000)            { DIGITS_8 (10000000); }
1435   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1436 #if SIZEOF_LONG == 4
1437   /* ``if (1)'' serves only to preserve editor indentation. */
1438   else if (1)                        { DIGITS_10 (1000000000); }
1439 #else  /* SIZEOF_LONG != 4 */
1440   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1441   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1442   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1443   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1444   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1445   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1446   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1447   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1448   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1449   else                               { DIGITS_19 (1000000000000000000L); }
1450 #endif /* SIZEOF_LONG != 4 */
1451
1452   *p = '\0';
1453 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1454 }
1455
1456 #undef ONE_DIGIT
1457 #undef ONE_DIGIT_ADVANCE
1458
1459 #undef DIGITS_1
1460 #undef DIGITS_2
1461 #undef DIGITS_3
1462 #undef DIGITS_4
1463 #undef DIGITS_5
1464 #undef DIGITS_6
1465 #undef DIGITS_7
1466 #undef DIGITS_8
1467 #undef DIGITS_9
1468 #undef DIGITS_10
1469 #undef DIGITS_11
1470 #undef DIGITS_12
1471 #undef DIGITS_13
1472 #undef DIGITS_14
1473 #undef DIGITS_15
1474 #undef DIGITS_16
1475 #undef DIGITS_17
1476 #undef DIGITS_18
1477 #undef DIGITS_19
1478 \f
1479 /* Support for timers. */
1480
1481 #undef TIMER_WINDOWS
1482 #undef TIMER_GETTIMEOFDAY
1483 #undef TIMER_TIME
1484
1485 /* Depending on the OS and availability of gettimeofday(), one and
1486    only one of the above constants will be defined.  Virtually all
1487    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1488    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1489    non-Windows systems without gettimeofday.
1490
1491    #### Perhaps we should also support ftime(), which exists on old
1492    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1493    C, if memory serves me.)  */
1494
1495 #ifdef WINDOWS
1496 # define TIMER_WINDOWS
1497 #else  /* not WINDOWS */
1498 # ifdef HAVE_GETTIMEOFDAY
1499 #  define TIMER_GETTIMEOFDAY
1500 # else
1501 #  define TIMER_TIME
1502 # endif
1503 #endif /* not WINDOWS */
1504
1505 struct wget_timer {
1506 #ifdef TIMER_GETTIMEOFDAY
1507   long secs;
1508   long usecs;
1509 #endif
1510
1511 #ifdef TIMER_TIME
1512   time_t secs;
1513 #endif
1514
1515 #ifdef TIMER_WINDOWS
1516   ULARGE_INTEGER wintime;
1517 #endif
1518 };
1519
1520 /* Allocate a timer.  It is not legal to do anything with a freshly
1521    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1522
1523 struct wget_timer *
1524 wtimer_allocate (void)
1525 {
1526   struct wget_timer *wt =
1527     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1528   return wt;
1529 }
1530
1531 /* Allocate a new timer and reset it.  Return the new timer. */
1532
1533 struct wget_timer *
1534 wtimer_new (void)
1535 {
1536   struct wget_timer *wt = wtimer_allocate ();
1537   wtimer_reset (wt);
1538   return wt;
1539 }
1540
1541 /* Free the resources associated with the timer.  Its further use is
1542    prohibited.  */
1543
1544 void
1545 wtimer_delete (struct wget_timer *wt)
1546 {
1547   xfree (wt);
1548 }
1549
1550 /* Reset timer WT.  This establishes the starting point from which
1551    wtimer_elapsed() will return the number of elapsed
1552    milliseconds.  It is allowed to reset a previously used timer.  */
1553
1554 void
1555 wtimer_reset (struct wget_timer *wt)
1556 {
1557 #ifdef TIMER_GETTIMEOFDAY
1558   struct timeval t;
1559   gettimeofday (&t, NULL);
1560   wt->secs  = t.tv_sec;
1561   wt->usecs = t.tv_usec;
1562 #endif
1563
1564 #ifdef TIMER_TIME
1565   wt->secs = time (NULL);
1566 #endif
1567
1568 #ifdef TIMER_WINDOWS
1569   FILETIME ft;
1570   SYSTEMTIME st;
1571   GetSystemTime (&st);
1572   SystemTimeToFileTime (&st, &ft);
1573   wt->wintime.HighPart = ft.dwHighDateTime;
1574   wt->wintime.LowPart  = ft.dwLowDateTime;
1575 #endif
1576 }
1577
1578 /* Return the number of milliseconds elapsed since the timer was last
1579    reset.  It is allowed to call this function more than once to get
1580    increasingly higher elapsed values.  */
1581
1582 long
1583 wtimer_elapsed (struct wget_timer *wt)
1584 {
1585 #ifdef TIMER_GETTIMEOFDAY
1586   struct timeval t;
1587   gettimeofday (&t, NULL);
1588   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1589 #endif
1590
1591 #ifdef TIMER_TIME
1592   time_t now = time (NULL);
1593   return 1000 * (now - wt->secs);
1594 #endif
1595
1596 #ifdef WINDOWS
1597   FILETIME ft;
1598   SYSTEMTIME st;
1599   ULARGE_INTEGER uli;
1600   GetSystemTime (&st);
1601   SystemTimeToFileTime (&st, &ft);
1602   uli.HighPart = ft.dwHighDateTime;
1603   uli.LowPart = ft.dwLowDateTime;
1604   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1605 #endif
1606 }
1607
1608 /* Return the assessed granularity of the timer implementation.  This
1609    is important for certain code that tries to deal with "zero" time
1610    intervals.  */
1611
1612 long
1613 wtimer_granularity (void)
1614 {
1615 #ifdef TIMER_GETTIMEOFDAY
1616   /* Granularity of gettimeofday is hugely architecture-dependent.
1617      However, it appears that on modern machines it is better than
1618      1ms.  */
1619   return 1;
1620 #endif
1621
1622 #ifdef TIMER_TIME
1623   /* This is clear. */
1624   return 1000;
1625 #endif
1626
1627 #ifdef TIMER_WINDOWS
1628   /* ? */
1629   return 1;
1630 #endif
1631 }
1632 \f
1633 /* This should probably be at a better place, but it doesn't really
1634    fit into html-parse.c.  */
1635
1636 /* The function returns the pointer to the malloc-ed quoted version of
1637    string s.  It will recognize and quote numeric and special graphic
1638    entities, as per RFC1866:
1639
1640    `&' -> `&amp;'
1641    `<' -> `&lt;'
1642    `>' -> `&gt;'
1643    `"' -> `&quot;'
1644    SP  -> `&#32;'
1645
1646    No other entities are recognized or replaced.  */
1647 char *
1648 html_quote_string (const char *s)
1649 {
1650   const char *b = s;
1651   char *p, *res;
1652   int i;
1653
1654   /* Pass through the string, and count the new size.  */
1655   for (i = 0; *s; s++, i++)
1656     {
1657       if (*s == '&')
1658         i += 4;                 /* `amp;' */
1659       else if (*s == '<' || *s == '>')
1660         i += 3;                 /* `lt;' and `gt;' */
1661       else if (*s == '\"')
1662         i += 5;                 /* `quot;' */
1663       else if (*s == ' ')
1664         i += 4;                 /* #32; */
1665     }
1666   res = (char *)xmalloc (i + 1);
1667   s = b;
1668   for (p = res; *s; s++)
1669     {
1670       switch (*s)
1671         {
1672         case '&':
1673           *p++ = '&';
1674           *p++ = 'a';
1675           *p++ = 'm';
1676           *p++ = 'p';
1677           *p++ = ';';
1678           break;
1679         case '<': case '>':
1680           *p++ = '&';
1681           *p++ = (*s == '<' ? 'l' : 'g');
1682           *p++ = 't';
1683           *p++ = ';';
1684           break;
1685         case '\"':
1686           *p++ = '&';
1687           *p++ = 'q';
1688           *p++ = 'u';
1689           *p++ = 'o';
1690           *p++ = 't';
1691           *p++ = ';';
1692           break;
1693         case ' ':
1694           *p++ = '&';
1695           *p++ = '#';
1696           *p++ = '3';
1697           *p++ = '2';
1698           *p++ = ';';
1699           break;
1700         default:
1701           *p++ = *s;
1702         }
1703     }
1704   *p = '\0';
1705   return res;
1706 }