sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 #include "wget.h"
  63 #include "utils.h"
  64 #include "fnmatch.h"
  65 #include "hash.h"
  66
  67 #ifndef errno
  68 extern int errno;
  69 #endif
  70
  71 /* This section implements several wrappers around the basic
  72    allocation routines.  This is done for two reasons: first, so that
  73    the callers of these functions need not consistently check for
  74    errors.  If there is not enough virtual memory for running Wget,
  75    something is seriously wrong, and Wget exits with an appropriate
  76    error message.
  77
  78    The second reason why these are useful is that, if DEBUG_MALLOC is
  79    defined, they also provide a handy (if crude) malloc debugging
  80    interface that checks memory leaks.  */
  81
  82 /* Croak the fatal memory error and bail out with non-zero exit
  83    status.  */
  84 static void
  85 memfatal (const char *what)
  86 {
  87   /* HACK: expose save_log_p from log.c, so we can turn it off in
  88      order to prevent saving the log.  Saving the log is dangerous
  89      because logprintf() and logputs() can call malloc(), so this
  90      could infloop.  When logging is turned off, infloop can no longer
  91      happen.
  92
  93      #### This is no longer really necessary because the new routines
  94      in log.c cons only if the line exceeds eighty characters.  But
  95      this can come at the end of a line, so it's OK to be careful.
  96
  97      On a more serious note, it would be good to have a
  98      log_forced_shutdown() routine that exposes this cleanly.  */
  99   extern int save_log_p;
 100
 101   save_log_p = 0;
 102   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
 103   exit (1);
 104 }
 105
 106 /* These functions end with _real because they need to be
 107    distinguished from the debugging functions, and from the macros.
 108    Explanation follows:
 109
 110    If memory debugging is not turned on, wget.h defines these:
 111
 112      #define xmalloc xmalloc_real
 113      #define xrealloc xrealloc_real
 114      #define xstrdup xstrdup_real
 115      #define xfree free
 116
 117    In case of memory debugging, the definitions are a bit more
 118    complex, because we want to provide more information, *and* we want
 119    to call the debugging code.  (The former is the reason why xmalloc
 120    and friends need to be macros in the first place.)  Then it looks
 121    like this:
 122
 123      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 124      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 125      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 126      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 127
 128    Each of the *_debug function does its magic and calls the real one.  */
 129
 130 #ifdef DEBUG_MALLOC
 131 # define STATIC_IF_DEBUG static
 132 #else
 133 # define STATIC_IF_DEBUG
 134 #endif
 135
 136 STATIC_IF_DEBUG void *
 137 xmalloc_real (size_t size)
 138 {
 139   void *ptr = malloc (size);
 140   if (!ptr)
 141     memfatal ("malloc");
 142   return ptr;
 143 }
 144
 145 STATIC_IF_DEBUG void *
 146 xrealloc_real (void *ptr, size_t newsize)
 147 {
 148   void *newptr;
 149
 150   /* Not all Un*xes have the feature of realloc() that calling it with
 151      a NULL-pointer is the same as malloc(), but it is easy to
 152      simulate.  */
 153   if (ptr)
 154     newptr = realloc (ptr, newsize);
 155   else
 156     newptr = malloc (newsize);
 157   if (!newptr)
 158     memfatal ("realloc");
 159   return newptr;
 160 }
 161
 162 STATIC_IF_DEBUG char *
 163 xstrdup_real (const char *s)
 164 {
 165   char *copy;
 166
 167 #ifndef HAVE_STRDUP
 168   int l = strlen (s);
 169   copy = malloc (l + 1);
 170   if (!copy)
 171     memfatal ("strdup");
 172   memcpy (copy, s, l + 1);
 173 #else  /* HAVE_STRDUP */
 174   copy = strdup (s);
 175   if (!copy)
 176     memfatal ("strdup");
 177 #endif /* HAVE_STRDUP */
 178
 179   return copy;
 180 }
 181
 182 #ifdef DEBUG_MALLOC
 183
 184 /* Crude home-grown routines for debugging some malloc-related
 185    problems.  Featured:
 186
 187    * Counting the number of malloc and free invocations, and reporting
 188      the "balance", i.e. how many times more malloc was called than it
 189      was the case with free.
 190
 191    * Making malloc store its entry into a simple array and free remove
 192      stuff from that array.  At the end, print the pointers which have
 193      not been freed, along with the source file and the line number.
 194      This also has the side-effect of detecting freeing memory that
 195      was never allocated.
 196
 197    Note that this kind of memory leak checking strongly depends on
 198    every malloc() being followed by a free(), even if the program is
 199    about to finish.  Wget is careful to free the data structure it
 200    allocated in init.c.  */
 201
 202 static int malloc_count, free_count;
 203
 204 static struct {
 205   char *ptr;
 206   const char *file;
 207   int line;
 208 } malloc_debug[100000];
 209
 210 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 211    which can be a real problem.  It would be nice to use a hash table
 212    for malloc_debug, but the functions in hash.c are not suitable
 213    because they can call malloc() themselves.  Maybe it would work if
 214    the hash table were preallocated to a huge size, and if we set the
 215    rehash threshold to 1.0.  */
 216
 217 /* Register PTR in malloc_debug.  Abort if this is not possible
 218    (presumably due to the number of current allocations exceeding the
 219    size of malloc_debug.)  */
 220
 221 static void
 222 register_ptr (void *ptr, const char *file, int line)
 223 {
 224   int i;
 225   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 226     if (malloc_debug[i].ptr == NULL)
 227       {
 228         malloc_debug[i].ptr = ptr;
 229         malloc_debug[i].file = file;
 230         malloc_debug[i].line = line;
 231         return;
 232       }
 233   abort ();
 234 }
 235
 236 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 237    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 238
 239 static void
 240 unregister_ptr (void *ptr)
 241 {
 242   int i;
 243   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 244     if (malloc_debug[i].ptr == ptr)
 245       {
 246         malloc_debug[i].ptr = NULL;
 247         return;
 248       }
 249   abort ();
 250 }
 251
 252 /* Print the malloc debug stats that can be gathered from the above
 253    information.  Currently this is the count of mallocs, frees, the
 254    difference between the two, and the dump of the contents of
 255    malloc_debug.  The last part are the memory leaks.  */
 256
 257 void
 258 print_malloc_debug_stats (void)
 259 {
 260   int i;
 261   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 262           malloc_count, free_count, malloc_count - free_count);
 263   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 264     if (malloc_debug[i].ptr != NULL)
 265       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 266               malloc_debug[i].file, malloc_debug[i].line);
 267 }
 268
 269 void *
 270 xmalloc_debug (size_t size, const char *source_file, int source_line)
 271 {
 272   void *ptr = xmalloc_real (size);
 273   ++malloc_count;
 274   register_ptr (ptr, source_file, source_line);
 275   return ptr;
 276 }
 277
 278 void
 279 xfree_debug (void *ptr, const char *source_file, int source_line)
 280 {
 281   assert (ptr != NULL);
 282   ++free_count;
 283   unregister_ptr (ptr);
 284   free (ptr);
 285 }
 286
 287 void *
 288 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 289 {
 290   void *newptr = xrealloc_real (ptr, newsize);
 291   if (!ptr)
 292     {
 293       ++malloc_count;
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   else if (newptr != ptr)
 297     {
 298       unregister_ptr (ptr);
 299       register_ptr (newptr, source_file, source_line);
 300     }
 301   return newptr;
 302 }
 303
 304 char *
 305 xstrdup_debug (const char *s, const char *source_file, int source_line)
 306 {
 307   char *copy = xstrdup_real (s);
 308   ++malloc_count;
 309   register_ptr (copy, source_file, source_line);
 310   return copy;
 311 }
 312
 313 #endif /* DEBUG_MALLOC */
 314 \f
 315 /* Utility function: like xstrdup(), but also lowercases S.  */
 316
 317 char *
 318 xstrdup_lower (const char *s)
 319 {
 320   char *copy = xstrdup (s);
 321   char *p = copy;
 322   for (; *p; p++)
 323     *p = TOLOWER (*p);
 324   return copy;
 325 }
 326
 327 /* Return a count of how many times CHR occurs in STRING. */
 328
 329 int
 330 count_char (const char *string, char chr)
 331 {
 332   const char *p;
 333   int count = 0;
 334   for (p = string; *p; p++)
 335     if (*p == chr)
 336       ++count;
 337   return count;
 338 }
 339
 340 /* Copy the string formed by two pointers (one on the beginning, other
 341    on the char after the last char) to a new, malloc-ed location.
 342    0-terminate it.  */
 343 char *
 344 strdupdelim (const char *beg, const char *end)
 345 {
 346   char *res = (char *)xmalloc (end - beg + 1);
 347   memcpy (res, beg, end - beg);
 348   res[end - beg] = '\0';
 349   return res;
 350 }
 351
 352 /* Parse a string containing comma-separated elements, and return a
 353    vector of char pointers with the elements.  Spaces following the
 354    commas are ignored.  */
 355 char **
 356 sepstring (const char *s)
 357 {
 358   char **res;
 359   const char *p;
 360   int i = 0;
 361
 362   if (!s || !*s)
 363     return NULL;
 364   res = NULL;
 365   p = s;
 366   while (*s)
 367     {
 368       if (*s == ',')
 369         {
 370           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 371           res[i] = strdupdelim (p, s);
 372           res[++i] = NULL;
 373           ++s;
 374           /* Skip the blanks following the ','.  */
 375           while (ISSPACE (*s))
 376             ++s;
 377           p = s;
 378         }
 379       else
 380         ++s;
 381     }
 382   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 383   res[i] = strdupdelim (p, s);
 384   res[i + 1] = NULL;
 385   return res;
 386 }
 387 \f
 388 /* Return pointer to a static char[] buffer in which zero-terminated
 389    string-representation of TM (in form hh:mm:ss) is printed.
 390
 391    If TM is non-NULL, the current time-in-seconds will be stored
 392    there.
 393
 394    (#### This is misleading: one would expect TM would be used instead
 395    of the current time in that case.  This design was probably
 396    influenced by the design time(2), and should be changed at some
 397    points.  No callers use non-NULL TM anyway.)  */
 398
 399 char *
 400 time_str (time_t *tm)
 401 {
 402   static char output[15];
 403   struct tm *ptm;
 404   time_t secs = time (tm);
 405
 406   if (secs == -1)
 407     {
 408       /* In case of error, return the empty string.  Maybe we should
 409          just abort if this happens?  */
 410       *output = '\0';
 411       return output;
 412     }
 413   ptm = localtime (&secs);
 414   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 415   return output;
 416 }
 417
 418 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 419
 420 char *
 421 datetime_str (time_t *tm)
 422 {
 423   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 424   struct tm *ptm;
 425   time_t secs = time (tm);
 426
 427   if (secs == -1)
 428     {
 429       /* In case of error, return the empty string.  Maybe we should
 430          just abort if this happens?  */
 431       *output = '\0';
 432       return output;
 433     }
 434   ptm = localtime (&secs);
 435   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 436            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 437            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 438   return output;
 439 }
 440 \f
 441 /* The Windows versions of the following two functions are defined in
 442    mswindows.c.  */
 443
 444 #ifndef WINDOWS
 445 void
 446 fork_to_background (void)
 447 {
 448   pid_t pid;
 449   /* Whether we arrange our own version of opt.lfilename here.  */
 450   int changedp = 0;
 451
 452   if (!opt.lfilename)
 453     {
 454       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 455       changedp = 1;
 456     }
 457   pid = fork ();
 458   if (pid < 0)
 459     {
 460       /* parent, error */
 461       perror ("fork");
 462       exit (1);
 463     }
 464   else if (pid != 0)
 465     {
 466       /* parent, no error */
 467       printf (_("Continuing in background.\n"));
 468       if (changedp)
 469         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 470       exit (0);
 471     }
 472   /* child: keep running */
 473 }
 474 #endif /* not WINDOWS */
 475 \f
 476 /* Resolve "." and ".." elements of PATH by destructively modifying
 477    PATH.  "." is resolved by removing that path element, and ".." is
 478    resolved by removing the preceding path element.  Leading and
 479    trailing slashes are preserved.
 480
 481    Return non-zero if any changes have been made.
 482
 483    For example, "a/b/c/./../d/.." will yield "a/b/".  More exhaustive
 484    test examples are provided below.  If you change anything in this
 485    function, run test_path_simplify to make sure you haven't broken a
 486    test case.
 487
 488    A previous version of this function was based on path_simplify()
 489    from GNU Bash, but it has been rewritten for Wget 1.8.1.  */
 490
 491 int
 492 path_simplify (char *path)
 493 {
 494   int change = 0;
 495   char *p, *end;
 496
 497   if (path[0] == '/')
 498     ++path;                     /* preserve the leading '/'. */
 499
 500   p = path;
 501   end = p + strlen (p) + 1;     /* position past the terminating zero. */
 502
 503   while (1)
 504     {
 505     again:
 506       /* P should point to the beginning of a path element. */
 507
 508       if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0'))
 509         {
 510           /* Handle "./foo" by moving "foo" two characters to the
 511              left. */
 512           if (*(p + 1) == '/')
 513             {
 514               change = 1;
 515               memmove (p, p + 2, end - p);
 516               end -= 2;
 517               goto again;
 518             }
 519           else
 520             {
 521               change = 1;
 522               *p = '\0';
 523               break;
 524             }
 525         }
 526       else if (*p == '.' && *(p + 1) == '.'
 527                && (*(p + 2) == '/' || *(p + 2) == '\0'))
 528         {
 529           /* Handle "../foo" by moving "foo" one path element to the
 530              left.  */
 531           char *b = p;          /* not p-1 because P can equal PATH */
 532
 533           /* Backtrack by one path element, but not past the beginning
 534              of PATH. */
 535
 536           /* foo/bar/../baz */
 537           /*         ^ p    */
 538           /*     ^ b        */
 539
 540           if (b > path)
 541             {
 542               /* Move backwards until B hits the beginning of the
 543                  previous path element or the beginning of path. */
 544               for (--b; b > path && *(b - 1) != '/'; b--)
 545                 ;
 546             }
 547
 548           change = 1;
 549           if (*(p + 2) == '/')
 550             {
 551               memmove (b, p + 3, end - (p + 3));
 552               end -= (p + 3) - b;
 553               p = b;
 554             }
 555           else
 556             {
 557               *b = '\0';
 558               break;
 559             }
 560
 561           goto again;
 562         }
 563       else if (*p == '/')
 564         {
 565           /* Remove empty path elements.  Not mandated by rfc1808 et
 566              al, but empty path elements are not all that useful, and
 567              the rest of Wget might not deal with them well. */
 568           char *q = p;
 569           while (*q == '/')
 570             ++q;
 571           change = 1;
 572           if (*q == '\0')
 573             {
 574               *p = '\0';
 575               break;
 576             }
 577           memmove (p, q, end - q);
 578           end -= q - p;
 579           goto again;
 580         }
 581
 582       /* Skip to the next path element. */
 583       while (*p && *p != '/')
 584         ++p;
 585       if (*p == '\0')
 586         break;
 587
 588       /* Make sure P points to the beginning of the next path element,
 589          which is location after the slash. */
 590       ++p;
 591     }
 592
 593   return change;
 594 }
 595 \f
 596 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 597    specified with TM.  */
 598 void
 599 touch (const char *file, time_t tm)
 600 {
 601 #ifdef HAVE_STRUCT_UTIMBUF
 602   struct utimbuf times;
 603   times.actime = times.modtime = tm;
 604 #else
 605   time_t times[2];
 606   times[0] = times[1] = tm;
 607 #endif
 608
 609   if (utime (file, &times) == -1)
 610     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 611 }
 612
 613 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 614    nothing under MS-Windows.  */
 615 int
 616 remove_link (const char *file)
 617 {
 618   int err = 0;
 619   struct stat st;
 620
 621   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 622     {
 623       DEBUGP (("Unlinking %s (symlink).\n", file));
 624       err = unlink (file);
 625       if (err != 0)
 626         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 627                    file, strerror (errno));
 628     }
 629   return err;
 630 }
 631
 632 /* Does FILENAME exist?  This is quite a lousy implementation, since
 633    it supplies no error codes -- only a yes-or-no answer.  Thus it
 634    will return that a file does not exist if, e.g., the directory is
 635    unreadable.  I don't mind it too much currently, though.  The
 636    proper way should, of course, be to have a third, error state,
 637    other than true/false, but that would introduce uncalled-for
 638    additional complexity to the callers.  */
 639 int
 640 file_exists_p (const char *filename)
 641 {
 642 #ifdef HAVE_ACCESS
 643   return access (filename, F_OK) >= 0;
 644 #else
 645   struct stat buf;
 646   return stat (filename, &buf) >= 0;
 647 #endif
 648 }
 649
 650 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 651    Returns 0 on error.  */
 652 int
 653 file_non_directory_p (const char *path)
 654 {
 655   struct stat buf;
 656   /* Use lstat() rather than stat() so that symbolic links pointing to
 657      directories can be identified correctly.  */
 658   if (lstat (path, &buf) != 0)
 659     return 0;
 660   return S_ISDIR (buf.st_mode) ? 0 : 1;
 661 }
 662
 663 /* Return a unique filename, given a prefix and count */
 664 static char *
 665 unique_name_1 (const char *fileprefix, int count)
 666 {
 667   char *filename;
 668
 669   if (count)
 670     {
 671       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 672       sprintf (filename, "%s.%d", fileprefix, count);
 673     }
 674   else
 675     filename = xstrdup (fileprefix);
 676
 677   if (!file_exists_p (filename))
 678     return filename;
 679   else
 680     {
 681       xfree (filename);
 682       return NULL;
 683     }
 684 }
 685
 686 /* Return a unique file name, based on PREFIX.  */
 687 char *
 688 unique_name (const char *prefix)
 689 {
 690   char *file = NULL;
 691   int count = 0;
 692
 693   while (!file)
 694     file = unique_name_1 (prefix, count++);
 695   return file;
 696 }
 697 \f
 698 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 699    are missing, create them first.  In case any mkdir() call fails,
 700    return its error status.  Returns 0 on successful completion.
 701
 702    The behaviour of this function should be identical to the behaviour
 703    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 704 int
 705 make_directory (const char *directory)
 706 {
 707   int quit = 0;
 708   int i;
 709   char *dir;
 710
 711   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 712      function is unsafe if called with a read-only char *argument.  */
 713   STRDUP_ALLOCA (dir, directory);
 714
 715   /* If the first character of dir is '/', skip it (and thus enable
 716      creation of absolute-pathname directories.  */
 717   for (i = (*dir == '/'); 1; ++i)
 718     {
 719       for (; dir[i] && dir[i] != '/'; i++)
 720         ;
 721       if (!dir[i])
 722         quit = 1;
 723       dir[i] = '\0';
 724       /* Check whether the directory already exists.  */
 725       if (!file_exists_p (dir))
 726         {
 727           if (mkdir (dir, 0777) < 0)
 728             return -1;
 729         }
 730       if (quit)
 731         break;
 732       else
 733         dir[i] = '/';
 734     }
 735   return 0;
 736 }
 737
 738 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 739    should be a file name.
 740
 741    file_merge("/foo/bar", "baz")  => "/foo/baz"
 742    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 743    file_merge("foo", "bar")       => "bar"
 744
 745    In other words, it's a simpler and gentler version of uri_merge_1.  */
 746
 747 char *
 748 file_merge (const char *base, const char *file)
 749 {
 750   char *result;
 751   const char *cut = (const char *)strrchr (base, '/');
 752
 753   if (!cut)
 754     return xstrdup (file);
 755
 756   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 757   memcpy (result, base, cut - base);
 758   result[cut - base] = '/';
 759   strcpy (result + (cut - base) + 1, file);
 760
 761   return result;
 762 }
 763 \f
 764 static int in_acclist PARAMS ((const char *const *, const char *, int));
 765
 766 /* Determine whether a file is acceptable to be followed, according to
 767    lists of patterns to accept/reject.  */
 768 int
 769 acceptable (const char *s)
 770 {
 771   int l = strlen (s);
 772
 773   while (l && s[l] != '/')
 774     --l;
 775   if (s[l] == '/')
 776     s += (l + 1);
 777   if (opt.accepts)
 778     {
 779       if (opt.rejects)
 780         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 781                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 782       else
 783         return in_acclist ((const char *const *)opt.accepts, s, 1);
 784     }
 785   else if (opt.rejects)
 786     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 787   return 1;
 788 }
 789
 790 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 791    `/something', frontcmp() will return 1 only if S2 begins with
 792    `/something'.  Otherwise, 0 is returned.  */
 793 int
 794 frontcmp (const char *s1, const char *s2)
 795 {
 796   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 797   return !*s1;
 798 }
 799
 800 /* Iterate through STRLIST, and return the first element that matches
 801    S, through wildcards or front comparison (as appropriate).  */
 802 static char *
 803 proclist (char **strlist, const char *s, enum accd flags)
 804 {
 805   char **x;
 806
 807   for (x = strlist; *x; x++)
 808     if (has_wildcards_p (*x))
 809       {
 810         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 811           break;
 812       }
 813     else
 814       {
 815         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 816         if (frontcmp (p, s))
 817           break;
 818       }
 819   return *x;
 820 }
 821
 822 /* Returns whether DIRECTORY is acceptable for download, wrt the
 823    include/exclude lists.
 824
 825    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 826    and absolute paths may be freely intermixed.  */
 827 int
 828 accdir (const char *directory, enum accd flags)
 829 {
 830   /* Remove starting '/'.  */
 831   if (flags & ALLABS && *directory == '/')
 832     ++directory;
 833   if (opt.includes)
 834     {
 835       if (!proclist (opt.includes, directory, flags))
 836         return 0;
 837     }
 838   if (opt.excludes)
 839     {
 840       if (proclist (opt.excludes, directory, flags))
 841         return 0;
 842     }
 843   return 1;
 844 }
 845
 846 /* Match the end of STRING against PATTERN.  For instance:
 847
 848    match_backwards ("abc", "bc") -> 1
 849    match_backwards ("abc", "ab") -> 0
 850    match_backwards ("abc", "abc") -> 1 */
 851 int
 852 match_tail (const char *string, const char *pattern)
 853 {
 854   int i, j;
 855
 856   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 857     if (string[i] != pattern[j])
 858       break;
 859   /* If the pattern was exhausted, the match was succesful.  */
 860   if (j == -1)
 861     return 1;
 862   else
 863     return 0;
 864 }
 865
 866 /* Checks whether string S matches each element of ACCEPTS.  A list
 867    element are matched either with fnmatch() or match_tail(),
 868    according to whether the element contains wildcards or not.
 869
 870    If the BACKWARD is 0, don't do backward comparison -- just compare
 871    them normally.  */
 872 static int
 873 in_acclist (const char *const *accepts, const char *s, int backward)
 874 {
 875   for (; *accepts; accepts++)
 876     {
 877       if (has_wildcards_p (*accepts))
 878         {
 879           /* fnmatch returns 0 if the pattern *does* match the
 880              string.  */
 881           if (fnmatch (*accepts, s, 0) == 0)
 882             return 1;
 883         }
 884       else
 885         {
 886           if (backward)
 887             {
 888               if (match_tail (s, *accepts))
 889                 return 1;
 890             }
 891           else
 892             {
 893               if (!strcmp (s, *accepts))
 894                 return 1;
 895             }
 896         }
 897     }
 898   return 0;
 899 }
 900
 901 /* Return the location of STR's suffix (file extension).  Examples:
 902    suffix ("foo.bar")       -> "bar"
 903    suffix ("foo.bar.baz")   -> "baz"
 904    suffix ("/foo/bar")      -> NULL
 905    suffix ("/foo.bar/baz")  -> NULL  */
 906 char *
 907 suffix (const char *str)
 908 {
 909   int i;
 910
 911   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 912     ;
 913
 914   if (str[i++] == '.')
 915     return (char *)str + i;
 916   else
 917     return NULL;
 918 }
 919
 920 /* Read a line from FP.  The function reallocs the storage as needed
 921    to accomodate for any length of the line.  Reallocs are done
 922    exponentially, doubling the storage after each overflow to minimize
 923    the number of calls to realloc() and fgets().  The newline
 924    character at the end of line is retained.
 925
 926    After end-of-file is encountered without anything being read, NULL
 927    is returned.  NULL is also returned on error.  To distinguish
 928    between these two cases, use the stdio function ferror().
 929
 930    A future version of this function will be rewritten to use fread()
 931    instead of fgets(), and to return the length of the line, which
 932    will make the function usable on files with binary content.  */
 933
 934 char *
 935 read_whole_line (FILE *fp)
 936 {
 937   int length = 0;
 938   int bufsize = 81;
 939   char *line = (char *)xmalloc (bufsize);
 940
 941   while (fgets (line + length, bufsize - length, fp))
 942     {
 943       length += strlen (line + length);
 944       if (length == 0)
 945         /* Possible for example when reading from a binary file where
 946            a line begins with \0.  */
 947         continue;
 948
 949       if (line[length - 1] == '\n')
 950         break;
 951
 952       /* fgets() guarantees to read the whole line, or to use up the
 953          space we've given it.  We can double the buffer
 954          unconditionally.  */
 955       bufsize <<= 1;
 956       line = xrealloc (line, bufsize);
 957     }
 958   if (length == 0 || ferror (fp))
 959     {
 960       xfree (line);
 961       return NULL;
 962     }
 963   if (length + 1 < bufsize)
 964     /* Relieve the memory from our exponential greediness.  We say
 965        `length + 1' because the terminating \0 is not included in
 966        LENGTH.  We don't need to zero-terminate the string ourselves,
 967        though, because fgets() does that.  */
 968     line = xrealloc (line, length + 1);
 969   return line;
 970 }
 971 \f
 972 /* Read FILE into memory.  A pointer to `struct file_memory' are
 973    returned; use struct element `content' to access file contents, and
 974    the element `length' to know the file length.  `content' is *not*
 975    zero-terminated, and you should *not* read or write beyond the [0,
 976    length) range of characters.
 977
 978    After you are done with the file contents, call read_file_free to
 979    release the memory.
 980
 981    Depending on the operating system and the type of file that is
 982    being read, read_file() either mmap's the file into memory, or
 983    reads the file into the core using read().
 984
 985    If file is named "-", fileno(stdin) is used for reading instead.
 986    If you want to read from a real file named "-", use "./-" instead.  */
 987
 988 struct file_memory *
 989 read_file (const char *file)
 990 {
 991   int fd;
 992   struct file_memory *fm;
 993   long size;
 994   int inhibit_close = 0;
 995
 996   /* Some magic in the finest tradition of Perl and its kin: if FILE
 997      is "-", just use stdin.  */
 998   if (HYPHENP (file))
 999     {
1000       fd = fileno (stdin);
1001       inhibit_close = 1;
1002       /* Note that we don't inhibit mmap() in this case.  If stdin is
1003          redirected from a regular file, mmap() will still work.  */
1004     }
1005   else
1006     fd = open (file, O_RDONLY);
1007   if (fd < 0)
1008     return NULL;
1009   fm = xmalloc (sizeof (struct file_memory));
1010
1011 #ifdef HAVE_MMAP
1012   {
1013     struct stat buf;
1014     if (fstat (fd, &buf) < 0)
1015       goto mmap_lose;
1016     fm->length = buf.st_size;
1017     /* NOTE: As far as I know, the callers of this function never
1018        modify the file text.  Relying on this would enable us to
1019        specify PROT_READ and MAP_SHARED for a marginal gain in
1020        efficiency, but at some cost to generality.  */
1021     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1022                         MAP_PRIVATE, fd, 0);
1023     if (fm->content == (char *)MAP_FAILED)
1024       goto mmap_lose;
1025     if (!inhibit_close)
1026       close (fd);
1027
1028     fm->mmap_p = 1;
1029     return fm;
1030   }
1031
1032  mmap_lose:
1033   /* The most common reason why mmap() fails is that FD does not point
1034      to a plain file.  However, it's also possible that mmap() doesn't
1035      work for a particular type of file.  Therefore, whenever mmap()
1036      fails, we just fall back to the regular method.  */
1037 #endif /* HAVE_MMAP */
1038
1039   fm->length = 0;
1040   size = 512;                   /* number of bytes fm->contents can
1041                                    hold at any given time. */
1042   fm->content = xmalloc (size);
1043   while (1)
1044     {
1045       long nread;
1046       if (fm->length > size / 2)
1047         {
1048           /* #### I'm not sure whether the whole exponential-growth
1049              thing makes sense with kernel read.  On Linux at least,
1050              read() refuses to read more than 4K from a file at a
1051              single chunk anyway.  But other Unixes might optimize it
1052              better, and it doesn't *hurt* anything, so I'm leaving
1053              it.  */
1054
1055           /* Normally, we grow SIZE exponentially to make the number
1056              of calls to read() and realloc() logarithmic in relation
1057              to file size.  However, read() can read an amount of data
1058              smaller than requested, and it would be unreasonably to
1059              double SIZE every time *something* was read.  Therefore,
1060              we double SIZE only when the length exceeds half of the
1061              entire allocated size.  */
1062           size <<= 1;
1063           fm->content = xrealloc (fm->content, size);
1064         }
1065       nread = read (fd, fm->content + fm->length, size - fm->length);
1066       if (nread > 0)
1067         /* Successful read. */
1068         fm->length += nread;
1069       else if (nread < 0)
1070         /* Error. */
1071         goto lose;
1072       else
1073         /* EOF */
1074         break;
1075     }
1076   if (!inhibit_close)
1077     close (fd);
1078   if (size > fm->length && fm->length != 0)
1079     /* Due to exponential growth of fm->content, the allocated region
1080        might be much larger than what is actually needed.  */
1081     fm->content = xrealloc (fm->content, fm->length);
1082   fm->mmap_p = 0;
1083   return fm;
1084
1085  lose:
1086   if (!inhibit_close)
1087     close (fd);
1088   xfree (fm->content);
1089   xfree (fm);
1090   return NULL;
1091 }
1092
1093 /* Release the resources held by FM.  Specifically, this calls
1094    munmap() or xfree() on fm->content, depending whether mmap or
1095    malloc/read were used to read in the file.  It also frees the
1096    memory needed to hold the FM structure itself.  */
1097
1098 void
1099 read_file_free (struct file_memory *fm)
1100 {
1101 #ifdef HAVE_MMAP
1102   if (fm->mmap_p)
1103     {
1104       munmap (fm->content, fm->length);
1105     }
1106   else
1107 #endif
1108     {
1109       xfree (fm->content);
1110     }
1111   xfree (fm);
1112 }
1113 \f
1114 /* Free the pointers in a NULL-terminated vector of pointers, then
1115    free the pointer itself.  */
1116 void
1117 free_vec (char **vec)
1118 {
1119   if (vec)
1120     {
1121       char **p = vec;
1122       while (*p)
1123         xfree (*p++);
1124       xfree (vec);
1125     }
1126 }
1127
1128 /* Append vector V2 to vector V1.  The function frees V2 and
1129    reallocates V1 (thus you may not use the contents of neither
1130    pointer after the call).  If V1 is NULL, V2 is returned.  */
1131 char **
1132 merge_vecs (char **v1, char **v2)
1133 {
1134   int i, j;
1135
1136   if (!v1)
1137     return v2;
1138   if (!v2)
1139     return v1;
1140   if (!*v2)
1141     {
1142       /* To avoid j == 0 */
1143       xfree (v2);
1144       return v1;
1145     }
1146   /* Count v1.  */
1147   for (i = 0; v1[i]; i++);
1148   /* Count v2.  */
1149   for (j = 0; v2[j]; j++);
1150   /* Reallocate v1.  */
1151   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1152   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1153   xfree (v2);
1154   return v1;
1155 }
1156
1157 /* A set of simple-minded routines to store strings in a linked list.
1158    This used to also be used for searching, but now we have hash
1159    tables for that.  */
1160
1161 /* It's a shame that these simple things like linked lists and hash
1162    tables (see hash.c) need to be implemented over and over again.  It
1163    would be nice to be able to use the routines from glib -- see
1164    www.gtk.org for details.  However, that would make Wget depend on
1165    glib, and I want to avoid dependencies to external libraries for
1166    reasons of convenience and portability (I suspect Wget is more
1167    portable than anything ever written for Gnome).  */
1168
1169 /* Append an element to the list.  If the list has a huge number of
1170    elements, this can get slow because it has to find the list's
1171    ending.  If you think you have to call slist_append in a loop,
1172    think about calling slist_prepend() followed by slist_nreverse().  */
1173
1174 slist *
1175 slist_append (slist *l, const char *s)
1176 {
1177   slist *newel = (slist *)xmalloc (sizeof (slist));
1178   slist *beg = l;
1179
1180   newel->string = xstrdup (s);
1181   newel->next = NULL;
1182
1183   if (!l)
1184     return newel;
1185   /* Find the last element.  */
1186   while (l->next)
1187     l = l->next;
1188   l->next = newel;
1189   return beg;
1190 }
1191
1192 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1193
1194 slist *
1195 slist_prepend (slist *l, const char *s)
1196 {
1197   slist *newel = (slist *)xmalloc (sizeof (slist));
1198   newel->string = xstrdup (s);
1199   newel->next = l;
1200   return newel;
1201 }
1202
1203 /* Destructively reverse L. */
1204
1205 slist *
1206 slist_nreverse (slist *l)
1207 {
1208   slist *prev = NULL;
1209   while (l)
1210     {
1211       slist *next = l->next;
1212       l->next = prev;
1213       prev = l;
1214       l = next;
1215     }
1216   return prev;
1217 }
1218
1219 /* Is there a specific entry in the list?  */
1220 int
1221 slist_contains (slist *l, const char *s)
1222 {
1223   for (; l; l = l->next)
1224     if (!strcmp (l->string, s))
1225       return 1;
1226   return 0;
1227 }
1228
1229 /* Free the whole slist.  */
1230 void
1231 slist_free (slist *l)
1232 {
1233   while (l)
1234     {
1235       slist *n = l->next;
1236       xfree (l->string);
1237       xfree (l);
1238       l = n;
1239     }
1240 }
1241 \f
1242 /* Sometimes it's useful to create "sets" of strings, i.e. special
1243    hash tables where you want to store strings as keys and merely
1244    query for their existence.  Here is a set of utility routines that
1245    makes that transparent.  */
1246
1247 void
1248 string_set_add (struct hash_table *ht, const char *s)
1249 {
1250   /* First check whether the set element already exists.  If it does,
1251      do nothing so that we don't have to free() the old element and
1252      then strdup() a new one.  */
1253   if (hash_table_contains (ht, s))
1254     return;
1255
1256   /* We use "1" as value.  It provides us a useful and clear arbitrary
1257      value, and it consumes no memory -- the pointers to the same
1258      string "1" will be shared by all the key-value pairs in all `set'
1259      hash tables.  */
1260   hash_table_put (ht, xstrdup (s), "1");
1261 }
1262
1263 /* Synonym for hash_table_contains... */
1264
1265 int
1266 string_set_contains (struct hash_table *ht, const char *s)
1267 {
1268   return hash_table_contains (ht, s);
1269 }
1270
1271 static int
1272 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1273 {
1274   xfree (key);
1275   return 0;
1276 }
1277
1278 void
1279 string_set_free (struct hash_table *ht)
1280 {
1281   hash_table_map (ht, string_set_free_mapper, NULL);
1282   hash_table_destroy (ht);
1283 }
1284
1285 static int
1286 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1287 {
1288   xfree (key);
1289   xfree (value);
1290   return 0;
1291 }
1292
1293 /* Another utility function: call free() on all keys and values of HT.  */
1294
1295 void
1296 free_keys_and_values (struct hash_table *ht)
1297 {
1298   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1299 }
1300
1301 \f
1302 /* Engine for legible and legible_very_long; this function works on
1303    strings.  */
1304
1305 static char *
1306 legible_1 (const char *repr)
1307 {
1308   static char outbuf[128];
1309   int i, i1, mod;
1310   char *outptr;
1311   const char *inptr;
1312
1313   /* Reset the pointers.  */
1314   outptr = outbuf;
1315   inptr = repr;
1316   /* If the number is negative, shift the pointers.  */
1317   if (*inptr == '-')
1318     {
1319       *outptr++ = '-';
1320       ++inptr;
1321     }
1322   /* How many digits before the first separator?  */
1323   mod = strlen (inptr) % 3;
1324   /* Insert them.  */
1325   for (i = 0; i < mod; i++)
1326     *outptr++ = inptr[i];
1327   /* Now insert the rest of them, putting separator before every
1328      third digit.  */
1329   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1330     {
1331       if (i % 3 == 0 && i1 != 0)
1332         *outptr++ = ',';
1333       *outptr++ = inptr[i1];
1334     }
1335   /* Zero-terminate the string.  */
1336   *outptr = '\0';
1337   return outbuf;
1338 }
1339
1340 /* Legible -- return a static pointer to the legibly printed long.  */
1341 char *
1342 legible (long l)
1343 {
1344   char inbuf[24];
1345   /* Print the number into the buffer.  */
1346   long_to_string (inbuf, l);
1347   return legible_1 (inbuf);
1348 }
1349
1350 /* Write a string representation of NUMBER into the provided buffer.
1351    We cannot use sprintf() because we cannot be sure whether the
1352    platform supports printing of what we chose for VERY_LONG_TYPE.
1353
1354    Example: Gcc supports `long long' under many platforms, but on many
1355    of those the native libc knows nothing of it and therefore cannot
1356    print it.
1357
1358    How long BUFFER needs to be depends on the platform and the content
1359    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1360    bytes are sufficient.  Using more might be a good idea.
1361
1362    This function does not go through the hoops that long_to_string
1363    goes to because it doesn't aspire to be fast.  (It's called perhaps
1364    once in a Wget run.)  */
1365
1366 static void
1367 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1368 {
1369   int i = 0;
1370   int j;
1371
1372   /* Print the number backwards... */
1373   do
1374     {
1375       buffer[i++] = '0' + number % 10;
1376       number /= 10;
1377     }
1378   while (number);
1379
1380   /* ...and reverse the order of the digits. */
1381   for (j = 0; j < i / 2; j++)
1382     {
1383       char c = buffer[j];
1384       buffer[j] = buffer[i - 1 - j];
1385       buffer[i - 1 - j] = c;
1386     }
1387   buffer[i] = '\0';
1388 }
1389
1390 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1391 char *
1392 legible_very_long (VERY_LONG_TYPE l)
1393 {
1394   char inbuf[128];
1395   /* Print the number into the buffer.  */
1396   very_long_to_string (inbuf, l);
1397   return legible_1 (inbuf);
1398 }
1399
1400 /* Count the digits in a (long) integer.  */
1401 int
1402 numdigit (long a)
1403 {
1404   int res = 1;
1405   if (a < 0)
1406     {
1407       a = -a;
1408       ++res;
1409     }
1410   while ((a /= 10) != 0)
1411     ++res;
1412   return res;
1413 }
1414
1415 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1416 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1417
1418 #define DIGITS_1(figure) ONE_DIGIT (figure)
1419 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1420 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1421 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1422 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1423 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1424 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1425 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1426 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1427 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1428
1429 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1430
1431 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1432 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1433 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1434 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1435 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1436 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1437 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1438 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1439 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1440
1441 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1442    to `sprintf(buffer, "%ld", number)', only much faster.
1443
1444    The speedup may make a difference in programs that frequently
1445    convert numbers to strings.  Some implementations of sprintf,
1446    particularly the one in GNU libc, have been known to be extremely
1447    slow compared to this function.
1448
1449    BUFFER should accept as many bytes as you expect the number to take
1450    up.  On machines with 64-bit longs the maximum needed size is 24
1451    bytes.  That includes the worst-case digits, the optional `-' sign,
1452    and the trailing \0.  */
1453
1454 void
1455 long_to_string (char *buffer, long number)
1456 {
1457   char *p = buffer;
1458   long n = number;
1459
1460 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1461   /* We are running in a strange or misconfigured environment.  Let
1462      sprintf cope with it.  */
1463   sprintf (buffer, "%ld", n);
1464 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1465
1466   if (n < 0)
1467     {
1468       *p++ = '-';
1469       n = -n;
1470     }
1471
1472   if      (n < 10)                   { DIGITS_1 (1); }
1473   else if (n < 100)                  { DIGITS_2 (10); }
1474   else if (n < 1000)                 { DIGITS_3 (100); }
1475   else if (n < 10000)                { DIGITS_4 (1000); }
1476   else if (n < 100000)               { DIGITS_5 (10000); }
1477   else if (n < 1000000)              { DIGITS_6 (100000); }
1478   else if (n < 10000000)             { DIGITS_7 (1000000); }
1479   else if (n < 100000000)            { DIGITS_8 (10000000); }
1480   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1481 #if SIZEOF_LONG == 4
1482   /* ``if (1)'' serves only to preserve editor indentation. */
1483   else if (1)                        { DIGITS_10 (1000000000); }
1484 #else  /* SIZEOF_LONG != 4 */
1485   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1486   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1487   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1488   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1489   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1490   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1491   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1492   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1493   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1494   else                               { DIGITS_19 (1000000000000000000L); }
1495 #endif /* SIZEOF_LONG != 4 */
1496
1497   *p = '\0';
1498 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1499 }
1500
1501 #undef ONE_DIGIT
1502 #undef ONE_DIGIT_ADVANCE
1503
1504 #undef DIGITS_1
1505 #undef DIGITS_2
1506 #undef DIGITS_3
1507 #undef DIGITS_4
1508 #undef DIGITS_5
1509 #undef DIGITS_6
1510 #undef DIGITS_7
1511 #undef DIGITS_8
1512 #undef DIGITS_9
1513 #undef DIGITS_10
1514 #undef DIGITS_11
1515 #undef DIGITS_12
1516 #undef DIGITS_13
1517 #undef DIGITS_14
1518 #undef DIGITS_15
1519 #undef DIGITS_16
1520 #undef DIGITS_17
1521 #undef DIGITS_18
1522 #undef DIGITS_19
1523 \f
1524 /* Support for timers. */
1525
1526 #undef TIMER_WINDOWS
1527 #undef TIMER_GETTIMEOFDAY
1528 #undef TIMER_TIME
1529
1530 /* Depending on the OS and availability of gettimeofday(), one and
1531    only one of the above constants will be defined.  Virtually all
1532    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1533    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1534    non-Windows systems without gettimeofday.
1535
1536    #### Perhaps we should also support ftime(), which exists on old
1537    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1538    C, if memory serves me.)  */
1539
1540 #ifdef WINDOWS
1541 # define TIMER_WINDOWS
1542 #else  /* not WINDOWS */
1543 # ifdef HAVE_GETTIMEOFDAY
1544 #  define TIMER_GETTIMEOFDAY
1545 # else
1546 #  define TIMER_TIME
1547 # endif
1548 #endif /* not WINDOWS */
1549
1550 struct wget_timer {
1551 #ifdef TIMER_GETTIMEOFDAY
1552   long secs;
1553   long usecs;
1554 #endif
1555
1556 #ifdef TIMER_TIME
1557   time_t secs;
1558 #endif
1559
1560 #ifdef TIMER_WINDOWS
1561   ULARGE_INTEGER wintime;
1562 #endif
1563 };
1564
1565 /* Allocate a timer.  It is not legal to do anything with a freshly
1566    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1567
1568 struct wget_timer *
1569 wtimer_allocate (void)
1570 {
1571   struct wget_timer *wt =
1572     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1573   return wt;
1574 }
1575
1576 /* Allocate a new timer and reset it.  Return the new timer. */
1577
1578 struct wget_timer *
1579 wtimer_new (void)
1580 {
1581   struct wget_timer *wt = wtimer_allocate ();
1582   wtimer_reset (wt);
1583   return wt;
1584 }
1585
1586 /* Free the resources associated with the timer.  Its further use is
1587    prohibited.  */
1588
1589 void
1590 wtimer_delete (struct wget_timer *wt)
1591 {
1592   xfree (wt);
1593 }
1594
1595 /* Reset timer WT.  This establishes the starting point from which
1596    wtimer_elapsed() will return the number of elapsed
1597    milliseconds.  It is allowed to reset a previously used timer.  */
1598
1599 void
1600 wtimer_reset (struct wget_timer *wt)
1601 {
1602 #ifdef TIMER_GETTIMEOFDAY
1603   struct timeval t;
1604   gettimeofday (&t, NULL);
1605   wt->secs  = t.tv_sec;
1606   wt->usecs = t.tv_usec;
1607 #endif
1608
1609 #ifdef TIMER_TIME
1610   wt->secs = time (NULL);
1611 #endif
1612
1613 #ifdef TIMER_WINDOWS
1614   FILETIME ft;
1615   SYSTEMTIME st;
1616   GetSystemTime (&st);
1617   SystemTimeToFileTime (&st, &ft);
1618   wt->wintime.HighPart = ft.dwHighDateTime;
1619   wt->wintime.LowPart  = ft.dwLowDateTime;
1620 #endif
1621 }
1622
1623 /* Return the number of milliseconds elapsed since the timer was last
1624    reset.  It is allowed to call this function more than once to get
1625    increasingly higher elapsed values.  */
1626
1627 long
1628 wtimer_elapsed (struct wget_timer *wt)
1629 {
1630 #ifdef TIMER_GETTIMEOFDAY
1631   struct timeval t;
1632   gettimeofday (&t, NULL);
1633   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1634 #endif
1635
1636 #ifdef TIMER_TIME
1637   time_t now = time (NULL);
1638   return 1000 * (now - wt->secs);
1639 #endif
1640
1641 #ifdef WINDOWS
1642   FILETIME ft;
1643   SYSTEMTIME st;
1644   ULARGE_INTEGER uli;
1645   GetSystemTime (&st);
1646   SystemTimeToFileTime (&st, &ft);
1647   uli.HighPart = ft.dwHighDateTime;
1648   uli.LowPart = ft.dwLowDateTime;
1649   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1650 #endif
1651 }
1652
1653 /* Return the assessed granularity of the timer implementation.  This
1654    is important for certain code that tries to deal with "zero" time
1655    intervals.  */
1656
1657 long
1658 wtimer_granularity (void)
1659 {
1660 #ifdef TIMER_GETTIMEOFDAY
1661   /* Granularity of gettimeofday is hugely architecture-dependent.
1662      However, it appears that on modern machines it is better than
1663      1ms.  */
1664   return 1;
1665 #endif
1666
1667 #ifdef TIMER_TIME
1668   /* This is clear. */
1669   return 1000;
1670 #endif
1671
1672 #ifdef TIMER_WINDOWS
1673   /* ? */
1674   return 1;
1675 #endif
1676 }
1677 \f
1678 /* This should probably be at a better place, but it doesn't really
1679    fit into html-parse.c.  */
1680
1681 /* The function returns the pointer to the malloc-ed quoted version of
1682    string s.  It will recognize and quote numeric and special graphic
1683    entities, as per RFC1866:
1684
1685    `&' -> `&amp;'
1686    `<' -> `&lt;'
1687    `>' -> `&gt;'
1688    `"' -> `&quot;'
1689    SP  -> `&#32;'
1690
1691    No other entities are recognized or replaced.  */
1692 char *
1693 html_quote_string (const char *s)
1694 {
1695   const char *b = s;
1696   char *p, *res;
1697   int i;
1698
1699   /* Pass through the string, and count the new size.  */
1700   for (i = 0; *s; s++, i++)
1701     {
1702       if (*s == '&')
1703         i += 4;                 /* `amp;' */
1704       else if (*s == '<' || *s == '>')
1705         i += 3;                 /* `lt;' and `gt;' */
1706       else if (*s == '\"')
1707         i += 5;                 /* `quot;' */
1708       else if (*s == ' ')
1709         i += 4;                 /* #32; */
1710     }
1711   res = (char *)xmalloc (i + 1);
1712   s = b;
1713   for (p = res; *s; s++)
1714     {
1715       switch (*s)
1716         {
1717         case '&':
1718           *p++ = '&';
1719           *p++ = 'a';
1720           *p++ = 'm';
1721           *p++ = 'p';
1722           *p++ = ';';
1723           break;
1724         case '<': case '>':
1725           *p++ = '&';
1726           *p++ = (*s == '<' ? 'l' : 'g');
1727           *p++ = 't';
1728           *p++ = ';';
1729           break;
1730         case '\"':
1731           *p++ = '&';
1732           *p++ = 'q';
1733           *p++ = 'u';
1734           *p++ = 'o';
1735           *p++ = 't';
1736           *p++ = ';';
1737           break;
1738         case ' ':
1739           *p++ = '&';
1740           *p++ = '#';
1741           *p++ = '3';
1742           *p++ = '2';
1743           *p++ = ';';
1744           break;
1745         default:
1746           *p++ = *s;
1747         }
1748     }
1749   *p = '\0';
1750   return res;
1751 }
1752
1753 /* Determine the width of the terminal we're running on.  If that's
1754    not possible, return 0.  */
1755
1756 int
1757 determine_screen_width (void)
1758 {
1759   /* If there's a way to get the terminal size using POSIX
1760      tcgetattr(), somebody please tell me.  */
1761 #ifndef TIOCGWINSZ
1762   return 0;
1763 #else  /* TIOCGWINSZ */
1764   int fd;
1765   struct winsize wsz;
1766
1767   if (opt.lfilename != NULL)
1768     return 0;
1769
1770   fd = fileno (stderr);
1771   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1772     return 0;                   /* most likely ENOTTY */
1773
1774   return wsz.ws_col;
1775 #endif /* TIOCGWINSZ */
1776 }
1777
1778 #if 0
1779 /* A debugging function for checking whether an MD5 library works. */
1780
1781 #include "gen-md5.h"
1782
1783 char *
1784 debug_test_md5 (char *buf)
1785 {
1786   unsigned char raw[16];
1787   static char res[33];
1788   unsigned char *p1;
1789   char *p2;
1790   int cnt;
1791   ALLOCA_MD5_CONTEXT (ctx);
1792
1793   gen_md5_init (ctx);
1794   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1795   gen_md5_finish (ctx, raw);
1796
1797   p1 = raw;
1798   p2 = res;
1799   cnt = 16;
1800   while (cnt--)
1801     {
1802       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1803       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1804       ++p1;
1805     }
1806   *p2 = '\0';
1807
1808   return res;
1809 }
1810 #endif
1811
1812 #if 0
1813 /* Debugging and testing support for path_simplify. */
1814
1815 /* Debug: run path_simplify on PATH and return the result in a new
1816    string.  Useful for calling from the debugger.  */
1817 static char *
1818 ps (char *path)
1819 {
1820   char *copy = xstrdup (path);
1821   path_simplify (copy);
1822   return copy;
1823 }
1824
1825 static void
1826 run_test (char *test, char *expected_result, int expected_change)
1827 {
1828   char *test_copy = xstrdup (test);
1829   int modified = path_simplify (test_copy);
1830
1831   if (0 != strcmp (test_copy, expected_result))
1832     {
1833       printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
1834               test, expected_result, test_copy);
1835     }
1836   if (modified != expected_change)
1837     {
1838       if (expected_change == 1)
1839         printf ("Expected no modification with path_simplify(\"%s\").\n",
1840                 test);
1841       else
1842         printf ("Expected modification with path_simplify(\"%s\").\n",
1843                 test);
1844     }
1845   xfree (test_copy);
1846 }
1847
1848 static void
1849 test_path_simplify (void)
1850 {
1851   static struct {
1852     char *test, *result;
1853     int should_modify;
1854   } tests[] = {
1855     { "",               "",             0 },
1856     { ".",              "",             1 },
1857     { "..",             "",             1 },
1858     { "foo",            "foo",          0 },
1859     { "foo/bar",        "foo/bar",      0 },
1860     { "foo///bar",      "foo/bar",      1 },
1861     { "foo/.",          "foo/",         1 },
1862     { "foo/./",         "foo/",         1 },
1863     { "foo./",          "foo./",        0 },
1864     { "foo/../bar",     "bar",          1 },
1865     { "foo/../bar/",    "bar/",         1 },
1866     { "foo/bar/..",     "foo/",         1 },
1867     { "foo/bar/../x",   "foo/x",        1 },
1868     { "foo/bar/../x/",  "foo/x/",       1 },
1869     { "foo/..",         "",             1 },
1870     { "foo/../..",      "",             1 },
1871     { "a/b/../../c",    "c",            1 },
1872     { "./a/../b",       "b",            1 }
1873   };
1874   int i;
1875
1876   for (i = 0; i < ARRAY_SIZE (tests); i++)
1877     {
1878       char *test = tests[i].test;
1879       char *expected_result = tests[i].result;
1880       int   expected_change = tests[i].should_modify;
1881       run_test (test, expected_result, expected_change);
1882     }
1883
1884   /* Now run all the tests with a leading slash before the test case,
1885      to prove that the slash is being preserved.  */
1886   for (i = 0; i < ARRAY_SIZE (tests); i++)
1887     {
1888       char *test, *expected_result;
1889       int expected_change = tests[i].should_modify;
1890
1891       test = xmalloc (1 + strlen (tests[i].test) + 1);
1892       sprintf (test, "/%s", tests[i].test);
1893
1894       expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
1895       sprintf (expected_result, "/%s", tests[i].result);
1896
1897       run_test (test, expected_result, expected_change);
1898
1899       xfree (test);
1900       xfree (expected_result);
1901     }
1902 }
1903 #endif