sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53 #ifdef HAVE_SYS_IOCTL_H
  54 # include <sys/ioctl.h>
  55 #endif
  56
  57 #include "wget.h"
  58 #include "utils.h"
  59 #include "fnmatch.h"
  60 #include "hash.h"
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* This section implements several wrappers around the basic
  67    allocation routines.  This is done for two reasons: first, so that
  68    the callers of these functions need not consistently check for
  69    errors.  If there is not enough virtual memory for running Wget,
  70    something is seriously wrong, and Wget exits with an appropriate
  71    error message.
  72
  73    The second reason why these are useful is that, if DEBUG_MALLOC is
  74    defined, they also provide a handy (if crude) malloc debugging
  75    interface that checks memory leaks.  */
  76
  77 /* Croak the fatal memory error and bail out with non-zero exit
  78    status.  */
  79 static void
  80 memfatal (const char *what)
  81 {
  82   /* HACK: expose save_log_p from log.c, so we can turn it off in
  83      order to prevent saving the log.  Saving the log is dangerous
  84      because logprintf() and logputs() can call malloc(), so this
  85      could infloop.  When logging is turned off, infloop can no longer
  86      happen.
  87
  88      #### This is no longer really necessary because the new routines
  89      in log.c cons only if the line exceeds eighty characters.  But
  90      this can come at the end of a line, so it's OK to be careful.
  91
  92      On a more serious note, it would be good to have a
  93      log_forced_shutdown() routine that exposes this cleanly.  */
  94   extern int save_log_p;
  95
  96   save_log_p = 0;
  97   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  98   exit (1);
  99 }
 100
 101 /* These functions end with _real because they need to be
 102    distinguished from the debugging functions, and from the macros.
 103    Explanation follows:
 104
 105    If memory debugging is not turned on, wget.h defines these:
 106
 107      #define xmalloc xmalloc_real
 108      #define xrealloc xrealloc_real
 109      #define xstrdup xstrdup_real
 110      #define xfree free
 111
 112    In case of memory debugging, the definitions are a bit more
 113    complex, because we want to provide more information, *and* we want
 114    to call the debugging code.  (The former is the reason why xmalloc
 115    and friends need to be macros in the first place.)  Then it looks
 116    like this:
 117
 118      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 119      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 120      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 121      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 122
 123    Each of the *_debug function does its magic and calls the real one.  */
 124
 125 #ifdef DEBUG_MALLOC
 126 # define STATIC_IF_DEBUG static
 127 #else
 128 # define STATIC_IF_DEBUG
 129 #endif
 130
 131 STATIC_IF_DEBUG void *
 132 xmalloc_real (size_t size)
 133 {
 134   void *ptr = malloc (size);
 135   if (!ptr)
 136     memfatal ("malloc");
 137   return ptr;
 138 }
 139
 140 STATIC_IF_DEBUG void *
 141 xrealloc_real (void *ptr, size_t newsize)
 142 {
 143   void *newptr;
 144
 145   /* Not all Un*xes have the feature of realloc() that calling it with
 146      a NULL-pointer is the same as malloc(), but it is easy to
 147      simulate.  */
 148   if (ptr)
 149     newptr = realloc (ptr, newsize);
 150   else
 151     newptr = malloc (newsize);
 152   if (!newptr)
 153     memfatal ("realloc");
 154   return newptr;
 155 }
 156
 157 STATIC_IF_DEBUG char *
 158 xstrdup_real (const char *s)
 159 {
 160   char *copy;
 161
 162 #ifndef HAVE_STRDUP
 163   int l = strlen (s);
 164   copy = malloc (l + 1);
 165   if (!copy)
 166     memfatal ("strdup");
 167   memcpy (copy, s, l + 1);
 168 #else  /* HAVE_STRDUP */
 169   copy = strdup (s);
 170   if (!copy)
 171     memfatal ("strdup");
 172 #endif /* HAVE_STRDUP */
 173
 174   return copy;
 175 }
 176
 177 #ifdef DEBUG_MALLOC
 178
 179 /* Crude home-grown routines for debugging some malloc-related
 180    problems.  Featured:
 181
 182    * Counting the number of malloc and free invocations, and reporting
 183      the "balance", i.e. how many times more malloc was called than it
 184      was the case with free.
 185
 186    * Making malloc store its entry into a simple array and free remove
 187      stuff from that array.  At the end, print the pointers which have
 188      not been freed, along with the source file and the line number.
 189      This also has the side-effect of detecting freeing memory that
 190      was never allocated.
 191
 192    Note that this kind of memory leak checking strongly depends on
 193    every malloc() being followed by a free(), even if the program is
 194    about to finish.  Wget is careful to free the data structure it
 195    allocated in init.c.  */
 196
 197 static int malloc_count, free_count;
 198
 199 static struct {
 200   char *ptr;
 201   const char *file;
 202   int line;
 203 } malloc_debug[100000];
 204
 205 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 206    which can be a real problem.  It would be nice to use a hash table
 207    for malloc_debug, but the functions in hash.c are not suitable
 208    because they can call malloc() themselves.  Maybe it would work if
 209    the hash table were preallocated to a huge size, and if we set the
 210    rehash threshold to 1.0.  */
 211
 212 /* Register PTR in malloc_debug.  Abort if this is not possible
 213    (presumably due to the number of current allocations exceeding the
 214    size of malloc_debug.)  */
 215
 216 static void
 217 register_ptr (void *ptr, const char *file, int line)
 218 {
 219   int i;
 220   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 221     if (malloc_debug[i].ptr == NULL)
 222       {
 223         malloc_debug[i].ptr = ptr;
 224         malloc_debug[i].file = file;
 225         malloc_debug[i].line = line;
 226         return;
 227       }
 228   abort ();
 229 }
 230
 231 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 232    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 233
 234 static void
 235 unregister_ptr (void *ptr)
 236 {
 237   int i;
 238   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 239     if (malloc_debug[i].ptr == ptr)
 240       {
 241         malloc_debug[i].ptr = NULL;
 242         return;
 243       }
 244   abort ();
 245 }
 246
 247 /* Print the malloc debug stats that can be gathered from the above
 248    information.  Currently this is the count of mallocs, frees, the
 249    difference between the two, and the dump of the contents of
 250    malloc_debug.  The last part are the memory leaks.  */
 251
 252 void
 253 print_malloc_debug_stats (void)
 254 {
 255   int i;
 256   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 257           malloc_count, free_count, malloc_count - free_count);
 258   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 259     if (malloc_debug[i].ptr != NULL)
 260       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 261               malloc_debug[i].file, malloc_debug[i].line);
 262 }
 263
 264 void *
 265 xmalloc_debug (size_t size, const char *source_file, int source_line)
 266 {
 267   void *ptr = xmalloc_real (size);
 268   ++malloc_count;
 269   register_ptr (ptr, source_file, source_line);
 270   return ptr;
 271 }
 272
 273 void
 274 xfree_debug (void *ptr, const char *source_file, int source_line)
 275 {
 276   assert (ptr != NULL);
 277   ++free_count;
 278   unregister_ptr (ptr);
 279   free (ptr);
 280 }
 281
 282 void *
 283 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 284 {
 285   void *newptr = xrealloc_real (ptr, newsize);
 286   if (!ptr)
 287     {
 288       ++malloc_count;
 289       register_ptr (newptr, source_file, source_line);
 290     }
 291   else if (newptr != ptr)
 292     {
 293       unregister_ptr (ptr);
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   return newptr;
 297 }
 298
 299 char *
 300 xstrdup_debug (const char *s, const char *source_file, int source_line)
 301 {
 302   char *copy = xstrdup_real (s);
 303   ++malloc_count;
 304   register_ptr (copy, source_file, source_line);
 305   return copy;
 306 }
 307
 308 #endif /* DEBUG_MALLOC */
 309 \f
 310 /* Utility function: like xstrdup(), but also lowercases S.  */
 311
 312 char *
 313 xstrdup_lower (const char *s)
 314 {
 315   char *copy = xstrdup (s);
 316   char *p = copy;
 317   for (; *p; p++)
 318     *p = TOLOWER (*p);
 319   return copy;
 320 }
 321
 322 /* Return a count of how many times CHR occurs in STRING. */
 323
 324 int
 325 count_char (const char *string, char chr)
 326 {
 327   const char *p;
 328   int count = 0;
 329   for (p = string; *p; p++)
 330     if (*p == chr)
 331       ++count;
 332   return count;
 333 }
 334
 335 /* Copy the string formed by two pointers (one on the beginning, other
 336    on the char after the last char) to a new, malloc-ed location.
 337    0-terminate it.  */
 338 char *
 339 strdupdelim (const char *beg, const char *end)
 340 {
 341   char *res = (char *)xmalloc (end - beg + 1);
 342   memcpy (res, beg, end - beg);
 343   res[end - beg] = '\0';
 344   return res;
 345 }
 346
 347 /* Parse a string containing comma-separated elements, and return a
 348    vector of char pointers with the elements.  Spaces following the
 349    commas are ignored.  */
 350 char **
 351 sepstring (const char *s)
 352 {
 353   char **res;
 354   const char *p;
 355   int i = 0;
 356
 357   if (!s || !*s)
 358     return NULL;
 359   res = NULL;
 360   p = s;
 361   while (*s)
 362     {
 363       if (*s == ',')
 364         {
 365           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 366           res[i] = strdupdelim (p, s);
 367           res[++i] = NULL;
 368           ++s;
 369           /* Skip the blanks following the ','.  */
 370           while (ISSPACE (*s))
 371             ++s;
 372           p = s;
 373         }
 374       else
 375         ++s;
 376     }
 377   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 378   res[i] = strdupdelim (p, s);
 379   res[i + 1] = NULL;
 380   return res;
 381 }
 382 \f
 383 /* Return pointer to a static char[] buffer in which zero-terminated
 384    string-representation of TM (in form hh:mm:ss) is printed.
 385
 386    If TM is non-NULL, the current time-in-seconds will be stored
 387    there.
 388
 389    (#### This is misleading: one would expect TM would be used instead
 390    of the current time in that case.  This design was probably
 391    influenced by the design time(2), and should be changed at some
 392    points.  No callers use non-NULL TM anyway.)  */
 393
 394 char *
 395 time_str (time_t *tm)
 396 {
 397   static char output[15];
 398   struct tm *ptm;
 399   time_t secs = time (tm);
 400
 401   if (secs == -1)
 402     {
 403       /* In case of error, return the empty string.  Maybe we should
 404          just abort if this happens?  */
 405       *output = '\0';
 406       return output;
 407     }
 408   ptm = localtime (&secs);
 409   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 410   return output;
 411 }
 412
 413 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 414
 415 char *
 416 datetime_str (time_t *tm)
 417 {
 418   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 419   struct tm *ptm;
 420   time_t secs = time (tm);
 421
 422   if (secs == -1)
 423     {
 424       /* In case of error, return the empty string.  Maybe we should
 425          just abort if this happens?  */
 426       *output = '\0';
 427       return output;
 428     }
 429   ptm = localtime (&secs);
 430   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 431            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 432            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 433   return output;
 434 }
 435 \f
 436 /* The Windows versions of the following two functions are defined in
 437    mswindows.c.  */
 438
 439 #ifndef WINDOWS
 440 void
 441 fork_to_background (void)
 442 {
 443   pid_t pid;
 444   /* Whether we arrange our own version of opt.lfilename here.  */
 445   int changedp = 0;
 446
 447   if (!opt.lfilename)
 448     {
 449       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 450       changedp = 1;
 451     }
 452   pid = fork ();
 453   if (pid < 0)
 454     {
 455       /* parent, error */
 456       perror ("fork");
 457       exit (1);
 458     }
 459   else if (pid != 0)
 460     {
 461       /* parent, no error */
 462       printf (_("Continuing in background.\n"));
 463       if (changedp)
 464         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 465       exit (0);
 466     }
 467   /* child: keep running */
 468 }
 469 #endif /* not WINDOWS */
 470 \f
 471 #if 0
 472 /* debug */
 473 char *
 474 ps (char *orig)
 475 {
 476   char *r = xstrdup (orig);
 477   path_simplify (r);
 478   return r;
 479 }
 480 #endif
 481
 482 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 483    in that:
 484         Multple `/'s are collapsed to a single `/'.
 485         Leading `./'s and trailing `/.'s are removed.
 486         Trailing `/'s are removed.
 487         Non-leading `../'s and trailing `..'s are handled by removing
 488         portions of the path.
 489
 490    E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
 491    from GNU Bash.
 492
 493    Changes for Wget:
 494         Always use '/' as stub_char.
 495         Don't check for local things using canon_stat.
 496         Change the original string instead of strdup-ing.
 497         React correctly when beginning with `./' and `../'.
 498         Don't zip out trailing slashes.  */
 499 int
 500 path_simplify (char *path)
 501 {
 502   register int i, start;
 503   int changes = 0;
 504   char stub_char;
 505
 506   if (!*path)
 507     return 0;
 508
 509   stub_char = '/';
 510
 511   if (path[0] == '/')
 512     /* Preserve initial '/'. */
 513     ++path;
 514
 515   /* Nix out leading `.' or `..' with.  */
 516   if ((path[0] == '.' && path[1] == '\0')
 517       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 518     {
 519       path[0] = '\0';
 520       changes = 1;
 521       return changes;
 522     }
 523
 524   /* Walk along PATH looking for things to compact.  */
 525   i = 0;
 526   while (1)
 527     {
 528       if (!path[i])
 529         break;
 530
 531       while (path[i] && path[i] != '/')
 532         i++;
 533
 534       start = i++;
 535
 536       /* If we didn't find any slashes, then there is nothing left to do.  */
 537       if (!path[start])
 538         break;
 539
 540       /* Handle multiple `/'s in a row.  */
 541       while (path[i] == '/')
 542         i++;
 543
 544       if ((start + 1) != i)
 545         {
 546           strcpy (path + start + 1, path + i);
 547           i = start + 1;
 548           changes = 1;
 549         }
 550
 551       /* Check for `../', `./' or trailing `.' by itself.  */
 552       if (path[i] == '.')
 553         {
 554           /* Handle trailing `.' by itself.  */
 555           if (!path[i + 1])
 556             {
 557               path[--i] = '\0';
 558               changes = 1;
 559               break;
 560             }
 561
 562           /* Handle `./'.  */
 563           if (path[i + 1] == '/')
 564             {
 565               strcpy (path + i, path + i + 1);
 566               i = (start < 0) ? 0 : start;
 567               changes = 1;
 568               continue;
 569             }
 570
 571           /* Handle `../' or trailing `..' by itself.  */
 572           if (path[i + 1] == '.' &&
 573               (path[i + 2] == '/' || !path[i + 2]))
 574             {
 575               while (--start > -1 && path[start] != '/');
 576               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 577               i = (start < 0) ? 0 : start;
 578               changes = 1;
 579               continue;
 580             }
 581         }       /* path == '.' */
 582     } /* while */
 583
 584   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 585   i = 0;
 586   while (1)
 587     {
 588       if (path[i] == '.' && path[i + 1] == '/')
 589         i += 2;
 590       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 591         i += 3;
 592       else
 593         break;
 594     }
 595   if (i)
 596     {
 597       strcpy (path, path + i - 0);
 598       changes = 1;
 599     }
 600
 601   return changes;
 602 }
 603 \f
 604 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 605    specified with TM.  */
 606 void
 607 touch (const char *file, time_t tm)
 608 {
 609 #ifdef HAVE_STRUCT_UTIMBUF
 610   struct utimbuf times;
 611   times.actime = times.modtime = tm;
 612 #else
 613   time_t times[2];
 614   times[0] = times[1] = tm;
 615 #endif
 616
 617   if (utime (file, &times) == -1)
 618     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 619 }
 620
 621 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 622    nothing under MS-Windows.  */
 623 int
 624 remove_link (const char *file)
 625 {
 626   int err = 0;
 627   struct stat st;
 628
 629   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 630     {
 631       DEBUGP (("Unlinking %s (symlink).\n", file));
 632       err = unlink (file);
 633       if (err != 0)
 634         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 635                    file, strerror (errno));
 636     }
 637   return err;
 638 }
 639
 640 /* Does FILENAME exist?  This is quite a lousy implementation, since
 641    it supplies no error codes -- only a yes-or-no answer.  Thus it
 642    will return that a file does not exist if, e.g., the directory is
 643    unreadable.  I don't mind it too much currently, though.  The
 644    proper way should, of course, be to have a third, error state,
 645    other than true/false, but that would introduce uncalled-for
 646    additional complexity to the callers.  */
 647 int
 648 file_exists_p (const char *filename)
 649 {
 650 #ifdef HAVE_ACCESS
 651   return access (filename, F_OK) >= 0;
 652 #else
 653   struct stat buf;
 654   return stat (filename, &buf) >= 0;
 655 #endif
 656 }
 657
 658 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 659    Returns 0 on error.  */
 660 int
 661 file_non_directory_p (const char *path)
 662 {
 663   struct stat buf;
 664   /* Use lstat() rather than stat() so that symbolic links pointing to
 665      directories can be identified correctly.  */
 666   if (lstat (path, &buf) != 0)
 667     return 0;
 668   return S_ISDIR (buf.st_mode) ? 0 : 1;
 669 }
 670
 671 /* Return a unique filename, given a prefix and count */
 672 static char *
 673 unique_name_1 (const char *fileprefix, int count)
 674 {
 675   char *filename;
 676
 677   if (count)
 678     {
 679       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 680       sprintf (filename, "%s.%d", fileprefix, count);
 681     }
 682   else
 683     filename = xstrdup (fileprefix);
 684
 685   if (!file_exists_p (filename))
 686     return filename;
 687   else
 688     {
 689       xfree (filename);
 690       return NULL;
 691     }
 692 }
 693
 694 /* Return a unique file name, based on PREFIX.  */
 695 char *
 696 unique_name (const char *prefix)
 697 {
 698   char *file = NULL;
 699   int count = 0;
 700
 701   while (!file)
 702     file = unique_name_1 (prefix, count++);
 703   return file;
 704 }
 705 \f
 706 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 707    are missing, create them first.  In case any mkdir() call fails,
 708    return its error status.  Returns 0 on successful completion.
 709
 710    The behaviour of this function should be identical to the behaviour
 711    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 712 int
 713 make_directory (const char *directory)
 714 {
 715   int quit = 0;
 716   int i;
 717   char *dir;
 718
 719   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 720      function is unsafe if called with a read-only char *argument.  */
 721   STRDUP_ALLOCA (dir, directory);
 722
 723   /* If the first character of dir is '/', skip it (and thus enable
 724      creation of absolute-pathname directories.  */
 725   for (i = (*dir == '/'); 1; ++i)
 726     {
 727       for (; dir[i] && dir[i] != '/'; i++)
 728         ;
 729       if (!dir[i])
 730         quit = 1;
 731       dir[i] = '\0';
 732       /* Check whether the directory already exists.  */
 733       if (!file_exists_p (dir))
 734         {
 735           if (mkdir (dir, 0777) < 0)
 736             return -1;
 737         }
 738       if (quit)
 739         break;
 740       else
 741         dir[i] = '/';
 742     }
 743   return 0;
 744 }
 745
 746 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 747    should be a file name.  For example, file_merge("/foo/bar", "baz")
 748    will return "/foo/baz".  file_merge("/foo/bar/", "baz") will return
 749    "foo/bar/baz".
 750
 751    In other words, it's a simpler and gentler version of uri_merge_1.  */
 752
 753 char *
 754 file_merge (const char *base, const char *file)
 755 {
 756   char *result;
 757   const char *cut = (const char *)strrchr (base, '/');
 758
 759   if (!cut)
 760     cut = base + strlen (base);
 761
 762   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 763   memcpy (result, base, cut - base);
 764   result[cut - base] = '/';
 765   strcpy (result + (cut - base) + 1, file);
 766
 767   return result;
 768 }
 769 \f
 770 static int in_acclist PARAMS ((const char *const *, const char *, int));
 771
 772 /* Determine whether a file is acceptable to be followed, according to
 773    lists of patterns to accept/reject.  */
 774 int
 775 acceptable (const char *s)
 776 {
 777   int l = strlen (s);
 778
 779   while (l && s[l] != '/')
 780     --l;
 781   if (s[l] == '/')
 782     s += (l + 1);
 783   if (opt.accepts)
 784     {
 785       if (opt.rejects)
 786         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 787                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 788       else
 789         return in_acclist ((const char *const *)opt.accepts, s, 1);
 790     }
 791   else if (opt.rejects)
 792     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 793   return 1;
 794 }
 795
 796 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 797    `/something', frontcmp() will return 1 only if S2 begins with
 798    `/something'.  Otherwise, 0 is returned.  */
 799 int
 800 frontcmp (const char *s1, const char *s2)
 801 {
 802   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 803   return !*s1;
 804 }
 805
 806 /* Iterate through STRLIST, and return the first element that matches
 807    S, through wildcards or front comparison (as appropriate).  */
 808 static char *
 809 proclist (char **strlist, const char *s, enum accd flags)
 810 {
 811   char **x;
 812
 813   for (x = strlist; *x; x++)
 814     if (has_wildcards_p (*x))
 815       {
 816         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 817           break;
 818       }
 819     else
 820       {
 821         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 822         if (frontcmp (p, s))
 823           break;
 824       }
 825   return *x;
 826 }
 827
 828 /* Returns whether DIRECTORY is acceptable for download, wrt the
 829    include/exclude lists.
 830
 831    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 832    and absolute paths may be freely intermixed.  */
 833 int
 834 accdir (const char *directory, enum accd flags)
 835 {
 836   /* Remove starting '/'.  */
 837   if (flags & ALLABS && *directory == '/')
 838     ++directory;
 839   if (opt.includes)
 840     {
 841       if (!proclist (opt.includes, directory, flags))
 842         return 0;
 843     }
 844   if (opt.excludes)
 845     {
 846       if (proclist (opt.excludes, directory, flags))
 847         return 0;
 848     }
 849   return 1;
 850 }
 851
 852 /* Match the end of STRING against PATTERN.  For instance:
 853
 854    match_backwards ("abc", "bc") -> 1
 855    match_backwards ("abc", "ab") -> 0
 856    match_backwards ("abc", "abc") -> 1 */
 857 static int
 858 match_backwards (const char *string, const char *pattern)
 859 {
 860   int i, j;
 861
 862   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 863     if (string[i] != pattern[j])
 864       break;
 865   /* If the pattern was exhausted, the match was succesful.  */
 866   if (j == -1)
 867     return 1;
 868   else
 869     return 0;
 870 }
 871
 872 /* Checks whether string S matches each element of ACCEPTS.  A list
 873    element are matched either with fnmatch() or match_backwards(),
 874    according to whether the element contains wildcards or not.
 875
 876    If the BACKWARD is 0, don't do backward comparison -- just compare
 877    them normally.  */
 878 static int
 879 in_acclist (const char *const *accepts, const char *s, int backward)
 880 {
 881   for (; *accepts; accepts++)
 882     {
 883       if (has_wildcards_p (*accepts))
 884         {
 885           /* fnmatch returns 0 if the pattern *does* match the
 886              string.  */
 887           if (fnmatch (*accepts, s, 0) == 0)
 888             return 1;
 889         }
 890       else
 891         {
 892           if (backward)
 893             {
 894               if (match_backwards (s, *accepts))
 895                 return 1;
 896             }
 897           else
 898             {
 899               if (!strcmp (s, *accepts))
 900                 return 1;
 901             }
 902         }
 903     }
 904   return 0;
 905 }
 906
 907 /* Return the malloc-ed suffix of STR.  For instance:
 908    suffix ("foo.bar")       -> "bar"
 909    suffix ("foo.bar.baz")   -> "baz"
 910    suffix ("/foo/bar")      -> NULL
 911    suffix ("/foo.bar/baz")  -> NULL  */
 912 char *
 913 suffix (const char *str)
 914 {
 915   int i;
 916
 917   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
 918   if (str[i++] == '.')
 919     return xstrdup (str + i);
 920   else
 921     return NULL;
 922 }
 923
 924 /* Read a line from FP.  The function reallocs the storage as needed
 925    to accomodate for any length of the line.  Reallocs are done
 926    storage exponentially, doubling the storage after each overflow to
 927    minimize the number of calls to realloc() and fgets().  The newline
 928    character at the end of line is retained.
 929
 930    After end-of-file is encountered without anything being read, NULL
 931    is returned.  NULL is also returned on error.  To distinguish
 932    between these two cases, use the stdio function ferror().  */
 933
 934 char *
 935 read_whole_line (FILE *fp)
 936 {
 937   int length = 0;
 938   int bufsize = 81;
 939   char *line = (char *)xmalloc (bufsize);
 940
 941   while (fgets (line + length, bufsize - length, fp))
 942     {
 943       length += strlen (line + length);
 944       assert (length > 0);
 945       if (line[length - 1] == '\n')
 946         break;
 947       /* fgets() guarantees to read the whole line, or to use up the
 948          space we've given it.  We can double the buffer
 949          unconditionally.  */
 950       bufsize <<= 1;
 951       line = xrealloc (line, bufsize);
 952     }
 953   if (length == 0 || ferror (fp))
 954     {
 955       xfree (line);
 956       return NULL;
 957     }
 958   if (length + 1 < bufsize)
 959     /* Relieve the memory from our exponential greediness.  We say
 960        `length + 1' because the terminating \0 is not included in
 961        LENGTH.  We don't need to zero-terminate the string ourselves,
 962        though, because fgets() does that.  */
 963     line = xrealloc (line, length + 1);
 964   return line;
 965 }
 966 \f
 967 /* Read FILE into memory.  A pointer to `struct file_memory' are
 968    returned; use struct element `content' to access file contents, and
 969    the element `length' to know the file length.  `content' is *not*
 970    zero-terminated, and you should *not* read or write beyond the [0,
 971    length) range of characters.
 972
 973    After you are done with the file contents, call read_file_free to
 974    release the memory.
 975
 976    Depending on the operating system and the type of file that is
 977    being read, read_file() either mmap's the file into memory, or
 978    reads the file into the core using read().
 979
 980    If file is named "-", fileno(stdin) is used for reading instead.
 981    If you want to read from a real file named "-", use "./-" instead.  */
 982
 983 struct file_memory *
 984 read_file (const char *file)
 985 {
 986   int fd;
 987   struct file_memory *fm;
 988   long size;
 989   int inhibit_close = 0;
 990
 991   /* Some magic in the finest tradition of Perl and its kin: if FILE
 992      is "-", just use stdin.  */
 993   if (HYPHENP (file))
 994     {
 995       fd = fileno (stdin);
 996       inhibit_close = 1;
 997       /* Note that we don't inhibit mmap() in this case.  If stdin is
 998          redirected from a regular file, mmap() will still work.  */
 999     }
1000   else
1001     fd = open (file, O_RDONLY);
1002   if (fd < 0)
1003     return NULL;
1004   fm = xmalloc (sizeof (struct file_memory));
1005
1006 #ifdef HAVE_MMAP
1007   {
1008     struct stat buf;
1009     if (fstat (fd, &buf) < 0)
1010       goto mmap_lose;
1011     fm->length = buf.st_size;
1012     /* NOTE: As far as I know, the callers of this function never
1013        modify the file text.  Relying on this would enable us to
1014        specify PROT_READ and MAP_SHARED for a marginal gain in
1015        efficiency, but at some cost to generality.  */
1016     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1017                         MAP_PRIVATE, fd, 0);
1018     if (fm->content == (char *)MAP_FAILED)
1019       goto mmap_lose;
1020     if (!inhibit_close)
1021       close (fd);
1022
1023     fm->mmap_p = 1;
1024     return fm;
1025   }
1026
1027  mmap_lose:
1028   /* The most common reason why mmap() fails is that FD does not point
1029      to a plain file.  However, it's also possible that mmap() doesn't
1030      work for a particular type of file.  Therefore, whenever mmap()
1031      fails, we just fall back to the regular method.  */
1032 #endif /* HAVE_MMAP */
1033
1034   fm->length = 0;
1035   size = 512;                   /* number of bytes fm->contents can
1036                                    hold at any given time. */
1037   fm->content = xmalloc (size);
1038   while (1)
1039     {
1040       long nread;
1041       if (fm->length > size / 2)
1042         {
1043           /* #### I'm not sure whether the whole exponential-growth
1044              thing makes sense with kernel read.  On Linux at least,
1045              read() refuses to read more than 4K from a file at a
1046              single chunk anyway.  But other Unixes might optimize it
1047              better, and it doesn't *hurt* anything, so I'm leaving
1048              it.  */
1049
1050           /* Normally, we grow SIZE exponentially to make the number
1051              of calls to read() and realloc() logarithmic in relation
1052              to file size.  However, read() can read an amount of data
1053              smaller than requested, and it would be unreasonably to
1054              double SIZE every time *something* was read.  Therefore,
1055              we double SIZE only when the length exceeds half of the
1056              entire allocated size.  */
1057           size <<= 1;
1058           fm->content = xrealloc (fm->content, size);
1059         }
1060       nread = read (fd, fm->content + fm->length, size - fm->length);
1061       if (nread > 0)
1062         /* Successful read. */
1063         fm->length += nread;
1064       else if (nread < 0)
1065         /* Error. */
1066         goto lose;
1067       else
1068         /* EOF */
1069         break;
1070     }
1071   if (!inhibit_close)
1072     close (fd);
1073   if (size > fm->length && fm->length != 0)
1074     /* Due to exponential growth of fm->content, the allocated region
1075        might be much larger than what is actually needed.  */
1076     fm->content = xrealloc (fm->content, fm->length);
1077   fm->mmap_p = 0;
1078   return fm;
1079
1080  lose:
1081   if (!inhibit_close)
1082     close (fd);
1083   xfree (fm->content);
1084   xfree (fm);
1085   return NULL;
1086 }
1087
1088 /* Release the resources held by FM.  Specifically, this calls
1089    munmap() or xfree() on fm->content, depending whether mmap or
1090    malloc/read were used to read in the file.  It also frees the
1091    memory needed to hold the FM structure itself.  */
1092
1093 void
1094 read_file_free (struct file_memory *fm)
1095 {
1096 #ifdef HAVE_MMAP
1097   if (fm->mmap_p)
1098     {
1099       munmap (fm->content, fm->length);
1100     }
1101   else
1102 #endif
1103     {
1104       xfree (fm->content);
1105     }
1106   xfree (fm);
1107 }
1108 \f
1109 /* Free the pointers in a NULL-terminated vector of pointers, then
1110    free the pointer itself.  */
1111 void
1112 free_vec (char **vec)
1113 {
1114   if (vec)
1115     {
1116       char **p = vec;
1117       while (*p)
1118         xfree (*p++);
1119       xfree (vec);
1120     }
1121 }
1122
1123 /* Append vector V2 to vector V1.  The function frees V2 and
1124    reallocates V1 (thus you may not use the contents of neither
1125    pointer after the call).  If V1 is NULL, V2 is returned.  */
1126 char **
1127 merge_vecs (char **v1, char **v2)
1128 {
1129   int i, j;
1130
1131   if (!v1)
1132     return v2;
1133   if (!v2)
1134     return v1;
1135   if (!*v2)
1136     {
1137       /* To avoid j == 0 */
1138       xfree (v2);
1139       return v1;
1140     }
1141   /* Count v1.  */
1142   for (i = 0; v1[i]; i++);
1143   /* Count v2.  */
1144   for (j = 0; v2[j]; j++);
1145   /* Reallocate v1.  */
1146   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1147   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1148   xfree (v2);
1149   return v1;
1150 }
1151
1152 /* A set of simple-minded routines to store strings in a linked list.
1153    This used to also be used for searching, but now we have hash
1154    tables for that.  */
1155
1156 /* It's a shame that these simple things like linked lists and hash
1157    tables (see hash.c) need to be implemented over and over again.  It
1158    would be nice to be able to use the routines from glib -- see
1159    www.gtk.org for details.  However, that would make Wget depend on
1160    glib, and I want to avoid dependencies to external libraries for
1161    reasons of convenience and portability (I suspect Wget is more
1162    portable than anything ever written for Gnome).  */
1163
1164 /* Append an element to the list.  If the list has a huge number of
1165    elements, this can get slow because it has to find the list's
1166    ending.  If you think you have to call slist_append in a loop,
1167    think about calling slist_prepend() followed by slist_nreverse().  */
1168
1169 slist *
1170 slist_append (slist *l, const char *s)
1171 {
1172   slist *newel = (slist *)xmalloc (sizeof (slist));
1173   slist *beg = l;
1174
1175   newel->string = xstrdup (s);
1176   newel->next = NULL;
1177
1178   if (!l)
1179     return newel;
1180   /* Find the last element.  */
1181   while (l->next)
1182     l = l->next;
1183   l->next = newel;
1184   return beg;
1185 }
1186
1187 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1188
1189 slist *
1190 slist_prepend (slist *l, const char *s)
1191 {
1192   slist *newel = (slist *)xmalloc (sizeof (slist));
1193   newel->string = xstrdup (s);
1194   newel->next = l;
1195   return newel;
1196 }
1197
1198 /* Destructively reverse L. */
1199
1200 slist *
1201 slist_nreverse (slist *l)
1202 {
1203   slist *prev = NULL;
1204   while (l)
1205     {
1206       slist *next = l->next;
1207       l->next = prev;
1208       prev = l;
1209       l = next;
1210     }
1211   return prev;
1212 }
1213
1214 /* Is there a specific entry in the list?  */
1215 int
1216 slist_contains (slist *l, const char *s)
1217 {
1218   for (; l; l = l->next)
1219     if (!strcmp (l->string, s))
1220       return 1;
1221   return 0;
1222 }
1223
1224 /* Free the whole slist.  */
1225 void
1226 slist_free (slist *l)
1227 {
1228   while (l)
1229     {
1230       slist *n = l->next;
1231       xfree (l->string);
1232       xfree (l);
1233       l = n;
1234     }
1235 }
1236 \f
1237 /* Sometimes it's useful to create "sets" of strings, i.e. special
1238    hash tables where you want to store strings as keys and merely
1239    query for their existence.  Here is a set of utility routines that
1240    makes that transparent.  */
1241
1242 void
1243 string_set_add (struct hash_table *ht, const char *s)
1244 {
1245   /* First check whether the set element already exists.  If it does,
1246      do nothing so that we don't have to free() the old element and
1247      then strdup() a new one.  */
1248   if (hash_table_contains (ht, s))
1249     return;
1250
1251   /* We use "1" as value.  It provides us a useful and clear arbitrary
1252      value, and it consumes no memory -- the pointers to the same
1253      string "1" will be shared by all the key-value pairs in all `set'
1254      hash tables.  */
1255   hash_table_put (ht, xstrdup (s), "1");
1256 }
1257
1258 /* Synonym for hash_table_contains... */
1259
1260 int
1261 string_set_contains (struct hash_table *ht, const char *s)
1262 {
1263   return hash_table_contains (ht, s);
1264 }
1265
1266 static int
1267 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1268 {
1269   xfree (key);
1270   return 0;
1271 }
1272
1273 void
1274 string_set_free (struct hash_table *ht)
1275 {
1276   hash_table_map (ht, string_set_free_mapper, NULL);
1277   hash_table_destroy (ht);
1278 }
1279
1280 static int
1281 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1282 {
1283   xfree (key);
1284   xfree (value);
1285   return 0;
1286 }
1287
1288 /* Another utility function: call free() on all keys and values of HT.  */
1289
1290 void
1291 free_keys_and_values (struct hash_table *ht)
1292 {
1293   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1294 }
1295
1296 \f
1297 /* Engine for legible and legible_very_long; this function works on
1298    strings.  */
1299
1300 static char *
1301 legible_1 (const char *repr)
1302 {
1303   static char outbuf[128];
1304   int i, i1, mod;
1305   char *outptr;
1306   const char *inptr;
1307
1308   /* Reset the pointers.  */
1309   outptr = outbuf;
1310   inptr = repr;
1311   /* If the number is negative, shift the pointers.  */
1312   if (*inptr == '-')
1313     {
1314       *outptr++ = '-';
1315       ++inptr;
1316     }
1317   /* How many digits before the first separator?  */
1318   mod = strlen (inptr) % 3;
1319   /* Insert them.  */
1320   for (i = 0; i < mod; i++)
1321     *outptr++ = inptr[i];
1322   /* Now insert the rest of them, putting separator before every
1323      third digit.  */
1324   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1325     {
1326       if (i % 3 == 0 && i1 != 0)
1327         *outptr++ = ',';
1328       *outptr++ = inptr[i1];
1329     }
1330   /* Zero-terminate the string.  */
1331   *outptr = '\0';
1332   return outbuf;
1333 }
1334
1335 /* Legible -- return a static pointer to the legibly printed long.  */
1336 char *
1337 legible (long l)
1338 {
1339   char inbuf[24];
1340   /* Print the number into the buffer.  */
1341   long_to_string (inbuf, l);
1342   return legible_1 (inbuf);
1343 }
1344
1345 /* Write a string representation of NUMBER into the provided buffer.
1346    We cannot use sprintf() because we cannot be sure whether the
1347    platform supports printing of what we chose for VERY_LONG_TYPE.
1348
1349    Example: Gcc supports `long long' under many platforms, but on many
1350    of those the native libc knows nothing of it and therefore cannot
1351    print it.
1352
1353    How long BUFFER needs to be depends on the platform and the content
1354    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1355    bytes are sufficient.  Using more might be a good idea.
1356
1357    This function does not go through the hoops that long_to_string
1358    goes to because it doesn't aspire to be fast.  (It's called perhaps
1359    once in a Wget run.)  */
1360
1361 static void
1362 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1363 {
1364   int i = 0;
1365   int j;
1366
1367   /* Print the number backwards... */
1368   do
1369     {
1370       buffer[i++] = '0' + number % 10;
1371       number /= 10;
1372     }
1373   while (number);
1374
1375   /* ...and reverse the order of the digits. */
1376   for (j = 0; j < i / 2; j++)
1377     {
1378       char c = buffer[j];
1379       buffer[j] = buffer[i - 1 - j];
1380       buffer[i - 1 - j] = c;
1381     }
1382   buffer[i] = '\0';
1383 }
1384
1385 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1386 char *
1387 legible_very_long (VERY_LONG_TYPE l)
1388 {
1389   char inbuf[128];
1390   /* Print the number into the buffer.  */
1391   very_long_to_string (inbuf, l);
1392   return legible_1 (inbuf);
1393 }
1394
1395 /* Count the digits in a (long) integer.  */
1396 int
1397 numdigit (long a)
1398 {
1399   int res = 1;
1400   if (a < 0)
1401     {
1402       a = -a;
1403       ++res;
1404     }
1405   while ((a /= 10) != 0)
1406     ++res;
1407   return res;
1408 }
1409
1410 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1411 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1412
1413 #define DIGITS_1(figure) ONE_DIGIT (figure)
1414 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1415 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1416 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1417 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1418 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1419 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1420 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1421 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1422 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1423
1424 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1425
1426 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1427 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1428 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1429 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1430 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1431 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1432 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1433 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1434 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1435
1436 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1437    to `sprintf(buffer, "%ld", number)', only much faster.
1438
1439    The speedup may make a difference in programs that frequently
1440    convert numbers to strings.  Some implementations of sprintf,
1441    particularly the one in GNU libc, have been known to be extremely
1442    slow compared to this function.
1443
1444    BUFFER should accept as many bytes as you expect the number to take
1445    up.  On machines with 64-bit longs the maximum needed size is 24
1446    bytes.  That includes the worst-case digits, the optional `-' sign,
1447    and the trailing \0.  */
1448
1449 void
1450 long_to_string (char *buffer, long number)
1451 {
1452   char *p = buffer;
1453   long n = number;
1454
1455 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1456   /* We are running in a strange or misconfigured environment.  Let
1457      sprintf cope with it.  */
1458   sprintf (buffer, "%ld", n);
1459 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1460
1461   if (n < 0)
1462     {
1463       *p++ = '-';
1464       n = -n;
1465     }
1466
1467   if      (n < 10)                   { DIGITS_1 (1); }
1468   else if (n < 100)                  { DIGITS_2 (10); }
1469   else if (n < 1000)                 { DIGITS_3 (100); }
1470   else if (n < 10000)                { DIGITS_4 (1000); }
1471   else if (n < 100000)               { DIGITS_5 (10000); }
1472   else if (n < 1000000)              { DIGITS_6 (100000); }
1473   else if (n < 10000000)             { DIGITS_7 (1000000); }
1474   else if (n < 100000000)            { DIGITS_8 (10000000); }
1475   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1476 #if SIZEOF_LONG == 4
1477   /* ``if (1)'' serves only to preserve editor indentation. */
1478   else if (1)                        { DIGITS_10 (1000000000); }
1479 #else  /* SIZEOF_LONG != 4 */
1480   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1481   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1482   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1483   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1484   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1485   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1486   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1487   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1488   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1489   else                               { DIGITS_19 (1000000000000000000L); }
1490 #endif /* SIZEOF_LONG != 4 */
1491
1492   *p = '\0';
1493 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1494 }
1495
1496 #undef ONE_DIGIT
1497 #undef ONE_DIGIT_ADVANCE
1498
1499 #undef DIGITS_1
1500 #undef DIGITS_2
1501 #undef DIGITS_3
1502 #undef DIGITS_4
1503 #undef DIGITS_5
1504 #undef DIGITS_6
1505 #undef DIGITS_7
1506 #undef DIGITS_8
1507 #undef DIGITS_9
1508 #undef DIGITS_10
1509 #undef DIGITS_11
1510 #undef DIGITS_12
1511 #undef DIGITS_13
1512 #undef DIGITS_14
1513 #undef DIGITS_15
1514 #undef DIGITS_16
1515 #undef DIGITS_17
1516 #undef DIGITS_18
1517 #undef DIGITS_19
1518 \f
1519 /* Support for timers. */
1520
1521 #undef TIMER_WINDOWS
1522 #undef TIMER_GETTIMEOFDAY
1523 #undef TIMER_TIME
1524
1525 /* Depending on the OS and availability of gettimeofday(), one and
1526    only one of the above constants will be defined.  Virtually all
1527    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1528    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1529    non-Windows systems without gettimeofday.
1530
1531    #### Perhaps we should also support ftime(), which exists on old
1532    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1533    C, if memory serves me.)  */
1534
1535 #ifdef WINDOWS
1536 # define TIMER_WINDOWS
1537 #else  /* not WINDOWS */
1538 # ifdef HAVE_GETTIMEOFDAY
1539 #  define TIMER_GETTIMEOFDAY
1540 # else
1541 #  define TIMER_TIME
1542 # endif
1543 #endif /* not WINDOWS */
1544
1545 struct wget_timer {
1546 #ifdef TIMER_GETTIMEOFDAY
1547   long secs;
1548   long usecs;
1549 #endif
1550
1551 #ifdef TIMER_TIME
1552   time_t secs;
1553 #endif
1554
1555 #ifdef TIMER_WINDOWS
1556   ULARGE_INTEGER wintime;
1557 #endif
1558 };
1559
1560 /* Allocate a timer.  It is not legal to do anything with a freshly
1561    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1562
1563 struct wget_timer *
1564 wtimer_allocate (void)
1565 {
1566   struct wget_timer *wt =
1567     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1568   return wt;
1569 }
1570
1571 /* Allocate a new timer and reset it.  Return the new timer. */
1572
1573 struct wget_timer *
1574 wtimer_new (void)
1575 {
1576   struct wget_timer *wt = wtimer_allocate ();
1577   wtimer_reset (wt);
1578   return wt;
1579 }
1580
1581 /* Free the resources associated with the timer.  Its further use is
1582    prohibited.  */
1583
1584 void
1585 wtimer_delete (struct wget_timer *wt)
1586 {
1587   xfree (wt);
1588 }
1589
1590 /* Reset timer WT.  This establishes the starting point from which
1591    wtimer_elapsed() will return the number of elapsed
1592    milliseconds.  It is allowed to reset a previously used timer.  */
1593
1594 void
1595 wtimer_reset (struct wget_timer *wt)
1596 {
1597 #ifdef TIMER_GETTIMEOFDAY
1598   struct timeval t;
1599   gettimeofday (&t, NULL);
1600   wt->secs  = t.tv_sec;
1601   wt->usecs = t.tv_usec;
1602 #endif
1603
1604 #ifdef TIMER_TIME
1605   wt->secs = time (NULL);
1606 #endif
1607
1608 #ifdef TIMER_WINDOWS
1609   FILETIME ft;
1610   SYSTEMTIME st;
1611   GetSystemTime (&st);
1612   SystemTimeToFileTime (&st, &ft);
1613   wt->wintime.HighPart = ft.dwHighDateTime;
1614   wt->wintime.LowPart  = ft.dwLowDateTime;
1615 #endif
1616 }
1617
1618 /* Return the number of milliseconds elapsed since the timer was last
1619    reset.  It is allowed to call this function more than once to get
1620    increasingly higher elapsed values.  */
1621
1622 long
1623 wtimer_elapsed (struct wget_timer *wt)
1624 {
1625 #ifdef TIMER_GETTIMEOFDAY
1626   struct timeval t;
1627   gettimeofday (&t, NULL);
1628   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1629 #endif
1630
1631 #ifdef TIMER_TIME
1632   time_t now = time (NULL);
1633   return 1000 * (now - wt->secs);
1634 #endif
1635
1636 #ifdef WINDOWS
1637   FILETIME ft;
1638   SYSTEMTIME st;
1639   ULARGE_INTEGER uli;
1640   GetSystemTime (&st);
1641   SystemTimeToFileTime (&st, &ft);
1642   uli.HighPart = ft.dwHighDateTime;
1643   uli.LowPart = ft.dwLowDateTime;
1644   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1645 #endif
1646 }
1647
1648 /* Return the assessed granularity of the timer implementation.  This
1649    is important for certain code that tries to deal with "zero" time
1650    intervals.  */
1651
1652 long
1653 wtimer_granularity (void)
1654 {
1655 #ifdef TIMER_GETTIMEOFDAY
1656   /* Granularity of gettimeofday is hugely architecture-dependent.
1657      However, it appears that on modern machines it is better than
1658      1ms.  */
1659   return 1;
1660 #endif
1661
1662 #ifdef TIMER_TIME
1663   /* This is clear. */
1664   return 1000;
1665 #endif
1666
1667 #ifdef TIMER_WINDOWS
1668   /* ? */
1669   return 1;
1670 #endif
1671 }
1672 \f
1673 /* This should probably be at a better place, but it doesn't really
1674    fit into html-parse.c.  */
1675
1676 /* The function returns the pointer to the malloc-ed quoted version of
1677    string s.  It will recognize and quote numeric and special graphic
1678    entities, as per RFC1866:
1679
1680    `&' -> `&amp;'
1681    `<' -> `&lt;'
1682    `>' -> `&gt;'
1683    `"' -> `&quot;'
1684    SP  -> `&#32;'
1685
1686    No other entities are recognized or replaced.  */
1687 char *
1688 html_quote_string (const char *s)
1689 {
1690   const char *b = s;
1691   char *p, *res;
1692   int i;
1693
1694   /* Pass through the string, and count the new size.  */
1695   for (i = 0; *s; s++, i++)
1696     {
1697       if (*s == '&')
1698         i += 4;                 /* `amp;' */
1699       else if (*s == '<' || *s == '>')
1700         i += 3;                 /* `lt;' and `gt;' */
1701       else if (*s == '\"')
1702         i += 5;                 /* `quot;' */
1703       else if (*s == ' ')
1704         i += 4;                 /* #32; */
1705     }
1706   res = (char *)xmalloc (i + 1);
1707   s = b;
1708   for (p = res; *s; s++)
1709     {
1710       switch (*s)
1711         {
1712         case '&':
1713           *p++ = '&';
1714           *p++ = 'a';
1715           *p++ = 'm';
1716           *p++ = 'p';
1717           *p++ = ';';
1718           break;
1719         case '<': case '>':
1720           *p++ = '&';
1721           *p++ = (*s == '<' ? 'l' : 'g');
1722           *p++ = 't';
1723           *p++ = ';';
1724           break;
1725         case '\"':
1726           *p++ = '&';
1727           *p++ = 'q';
1728           *p++ = 'u';
1729           *p++ = 'o';
1730           *p++ = 't';
1731           *p++ = ';';
1732           break;
1733         case ' ':
1734           *p++ = '&';
1735           *p++ = '#';
1736           *p++ = '3';
1737           *p++ = '2';
1738           *p++ = ';';
1739           break;
1740         default:
1741           *p++ = *s;
1742         }
1743     }
1744   *p = '\0';
1745   return res;
1746 }
1747
1748 /* Determine the width of the terminal we're running on.  If that's
1749    not possible, return 0.  */
1750
1751 int
1752 determine_screen_width (void)
1753 {
1754   /* If there's a way to get the terminal size using POSIX
1755      tcgetattr(), somebody please tell me.  */
1756 #ifndef TIOCGWINSZ
1757   return 0;
1758 #else  /* TIOCGWINSZ */
1759   int fd;
1760   struct winsize wsz;
1761
1762   if (opt.lfilename != NULL)
1763     return 0;
1764
1765   fd = fileno (stderr);
1766   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1767     return 0;                   /* most likely ENOTTY */
1768
1769   return wsz.ws_col;
1770 #endif /* TIOCGWINSZ */
1771 }