sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53 #ifdef HAVE_SYS_IOCTL_H
  54 # include <sys/ioctl.h>
  55 #endif
  56
  57 #include "wget.h"
  58 #include "utils.h"
  59 #include "fnmatch.h"
  60 #include "hash.h"
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* This section implements several wrappers around the basic
  67    allocation routines.  This is done for two reasons: first, so that
  68    the callers of these functions need not consistently check for
  69    errors.  If there is not enough virtual memory for running Wget,
  70    something is seriously wrong, and Wget exits with an appropriate
  71    error message.
  72
  73    The second reason why these are useful is that, if DEBUG_MALLOC is
  74    defined, they also provide a handy (if crude) malloc debugging
  75    interface that checks memory leaks.  */
  76
  77 /* Croak the fatal memory error and bail out with non-zero exit
  78    status.  */
  79 static void
  80 memfatal (const char *what)
  81 {
  82   /* HACK: expose save_log_p from log.c, so we can turn it off in
  83      order to prevent saving the log.  Saving the log is dangerous
  84      because logprintf() and logputs() can call malloc(), so this
  85      could infloop.  When logging is turned off, infloop can no longer
  86      happen.
  87
  88      #### This is no longer really necessary because the new routines
  89      in log.c cons only if the line exceeds eighty characters.  But
  90      this can come at the end of a line, so it's OK to be careful.
  91
  92      On a more serious note, it would be good to have a
  93      log_forced_shutdown() routine that exposes this cleanly.  */
  94   extern int save_log_p;
  95
  96   save_log_p = 0;
  97   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  98   exit (1);
  99 }
 100
 101 /* These functions end with _real because they need to be
 102    distinguished from the debugging functions, and from the macros.
 103    Explanation follows:
 104
 105    If memory debugging is not turned on, wget.h defines these:
 106
 107      #define xmalloc xmalloc_real
 108      #define xrealloc xrealloc_real
 109      #define xstrdup xstrdup_real
 110      #define xfree free
 111
 112    In case of memory debugging, the definitions are a bit more
 113    complex, because we want to provide more information, *and* we want
 114    to call the debugging code.  (The former is the reason why xmalloc
 115    and friends need to be macros in the first place.)  Then it looks
 116    like this:
 117
 118      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 119      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 120      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 121      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 122
 123    Each of the *_debug function does its magic and calls the real one.  */
 124
 125 #ifdef DEBUG_MALLOC
 126 # define STATIC_IF_DEBUG static
 127 #else
 128 # define STATIC_IF_DEBUG
 129 #endif
 130
 131 STATIC_IF_DEBUG void *
 132 xmalloc_real (size_t size)
 133 {
 134   void *ptr = malloc (size);
 135   if (!ptr)
 136     memfatal ("malloc");
 137   return ptr;
 138 }
 139
 140 STATIC_IF_DEBUG void *
 141 xrealloc_real (void *ptr, size_t newsize)
 142 {
 143   void *newptr;
 144
 145   /* Not all Un*xes have the feature of realloc() that calling it with
 146      a NULL-pointer is the same as malloc(), but it is easy to
 147      simulate.  */
 148   if (ptr)
 149     newptr = realloc (ptr, newsize);
 150   else
 151     newptr = malloc (newsize);
 152   if (!newptr)
 153     memfatal ("realloc");
 154   return newptr;
 155 }
 156
 157 STATIC_IF_DEBUG char *
 158 xstrdup_real (const char *s)
 159 {
 160   char *copy;
 161
 162 #ifndef HAVE_STRDUP
 163   int l = strlen (s);
 164   copy = malloc (l + 1);
 165   if (!copy)
 166     memfatal ("strdup");
 167   memcpy (copy, s, l + 1);
 168 #else  /* HAVE_STRDUP */
 169   copy = strdup (s);
 170   if (!copy)
 171     memfatal ("strdup");
 172 #endif /* HAVE_STRDUP */
 173
 174   return copy;
 175 }
 176
 177 #ifdef DEBUG_MALLOC
 178
 179 /* Crude home-grown routines for debugging some malloc-related
 180    problems.  Featured:
 181
 182    * Counting the number of malloc and free invocations, and reporting
 183      the "balance", i.e. how many times more malloc was called than it
 184      was the case with free.
 185
 186    * Making malloc store its entry into a simple array and free remove
 187      stuff from that array.  At the end, print the pointers which have
 188      not been freed, along with the source file and the line number.
 189      This also has the side-effect of detecting freeing memory that
 190      was never allocated.
 191
 192    Note that this kind of memory leak checking strongly depends on
 193    every malloc() being followed by a free(), even if the program is
 194    about to finish.  Wget is careful to free the data structure it
 195    allocated in init.c.  */
 196
 197 static int malloc_count, free_count;
 198
 199 static struct {
 200   char *ptr;
 201   const char *file;
 202   int line;
 203 } malloc_debug[100000];
 204
 205 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 206    which can be a real problem.  It would be nice to use a hash table
 207    for malloc_debug, but the functions in hash.c are not suitable
 208    because they can call malloc() themselves.  Maybe it would work if
 209    the hash table were preallocated to a huge size, and if we set the
 210    rehash threshold to 1.0.  */
 211
 212 /* Register PTR in malloc_debug.  Abort if this is not possible
 213    (presumably due to the number of current allocations exceeding the
 214    size of malloc_debug.)  */
 215
 216 static void
 217 register_ptr (void *ptr, const char *file, int line)
 218 {
 219   int i;
 220   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 221     if (malloc_debug[i].ptr == NULL)
 222       {
 223         malloc_debug[i].ptr = ptr;
 224         malloc_debug[i].file = file;
 225         malloc_debug[i].line = line;
 226         return;
 227       }
 228   abort ();
 229 }
 230
 231 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 232    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 233
 234 static void
 235 unregister_ptr (void *ptr)
 236 {
 237   int i;
 238   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 239     if (malloc_debug[i].ptr == ptr)
 240       {
 241         malloc_debug[i].ptr = NULL;
 242         return;
 243       }
 244   abort ();
 245 }
 246
 247 /* Print the malloc debug stats that can be gathered from the above
 248    information.  Currently this is the count of mallocs, frees, the
 249    difference between the two, and the dump of the contents of
 250    malloc_debug.  The last part are the memory leaks.  */
 251
 252 void
 253 print_malloc_debug_stats (void)
 254 {
 255   int i;
 256   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 257           malloc_count, free_count, malloc_count - free_count);
 258   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 259     if (malloc_debug[i].ptr != NULL)
 260       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 261               malloc_debug[i].file, malloc_debug[i].line);
 262 }
 263
 264 void *
 265 xmalloc_debug (size_t size, const char *source_file, int source_line)
 266 {
 267   void *ptr = xmalloc_real (size);
 268   ++malloc_count;
 269   register_ptr (ptr, source_file, source_line);
 270   return ptr;
 271 }
 272
 273 void
 274 xfree_debug (void *ptr, const char *source_file, int source_line)
 275 {
 276   assert (ptr != NULL);
 277   ++free_count;
 278   unregister_ptr (ptr);
 279   free (ptr);
 280 }
 281
 282 void *
 283 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 284 {
 285   void *newptr = xrealloc_real (ptr, newsize);
 286   if (!ptr)
 287     {
 288       ++malloc_count;
 289       register_ptr (newptr, source_file, source_line);
 290     }
 291   else if (newptr != ptr)
 292     {
 293       unregister_ptr (ptr);
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   return newptr;
 297 }
 298
 299 char *
 300 xstrdup_debug (const char *s, const char *source_file, int source_line)
 301 {
 302   char *copy = xstrdup_real (s);
 303   ++malloc_count;
 304   register_ptr (copy, source_file, source_line);
 305   return copy;
 306 }
 307
 308 #endif /* DEBUG_MALLOC */
 309 \f
 310 /* Utility function: like xstrdup(), but also lowercases S.  */
 311
 312 char *
 313 xstrdup_lower (const char *s)
 314 {
 315   char *copy = xstrdup (s);
 316   char *p = copy;
 317   for (; *p; p++)
 318     *p = TOLOWER (*p);
 319   return copy;
 320 }
 321
 322 /* Return a count of how many times CHR occurs in STRING. */
 323
 324 int
 325 count_char (const char *string, char chr)
 326 {
 327   const char *p;
 328   int count = 0;
 329   for (p = string; *p; p++)
 330     if (*p == chr)
 331       ++count;
 332   return count;
 333 }
 334
 335 /* Copy the string formed by two pointers (one on the beginning, other
 336    on the char after the last char) to a new, malloc-ed location.
 337    0-terminate it.  */
 338 char *
 339 strdupdelim (const char *beg, const char *end)
 340 {
 341   char *res = (char *)xmalloc (end - beg + 1);
 342   memcpy (res, beg, end - beg);
 343   res[end - beg] = '\0';
 344   return res;
 345 }
 346
 347 /* Parse a string containing comma-separated elements, and return a
 348    vector of char pointers with the elements.  Spaces following the
 349    commas are ignored.  */
 350 char **
 351 sepstring (const char *s)
 352 {
 353   char **res;
 354   const char *p;
 355   int i = 0;
 356
 357   if (!s || !*s)
 358     return NULL;
 359   res = NULL;
 360   p = s;
 361   while (*s)
 362     {
 363       if (*s == ',')
 364         {
 365           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 366           res[i] = strdupdelim (p, s);
 367           res[++i] = NULL;
 368           ++s;
 369           /* Skip the blanks following the ','.  */
 370           while (ISSPACE (*s))
 371             ++s;
 372           p = s;
 373         }
 374       else
 375         ++s;
 376     }
 377   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 378   res[i] = strdupdelim (p, s);
 379   res[i + 1] = NULL;
 380   return res;
 381 }
 382 \f
 383 /* Return pointer to a static char[] buffer in which zero-terminated
 384    string-representation of TM (in form hh:mm:ss) is printed.
 385
 386    If TM is non-NULL, the current time-in-seconds will be stored
 387    there.
 388
 389    (#### This is misleading: one would expect TM would be used instead
 390    of the current time in that case.  This design was probably
 391    influenced by the design time(2), and should be changed at some
 392    points.  No callers use non-NULL TM anyway.)  */
 393
 394 char *
 395 time_str (time_t *tm)
 396 {
 397   static char output[15];
 398   struct tm *ptm;
 399   time_t secs = time (tm);
 400
 401   if (secs == -1)
 402     {
 403       /* In case of error, return the empty string.  Maybe we should
 404          just abort if this happens?  */
 405       *output = '\0';
 406       return output;
 407     }
 408   ptm = localtime (&secs);
 409   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 410   return output;
 411 }
 412
 413 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 414
 415 char *
 416 datetime_str (time_t *tm)
 417 {
 418   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 419   struct tm *ptm;
 420   time_t secs = time (tm);
 421
 422   if (secs == -1)
 423     {
 424       /* In case of error, return the empty string.  Maybe we should
 425          just abort if this happens?  */
 426       *output = '\0';
 427       return output;
 428     }
 429   ptm = localtime (&secs);
 430   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 431            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 432            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 433   return output;
 434 }
 435 \f
 436 /* The Windows versions of the following two functions are defined in
 437    mswindows.c.  */
 438
 439 #ifndef WINDOWS
 440 void
 441 fork_to_background (void)
 442 {
 443   pid_t pid;
 444   /* Whether we arrange our own version of opt.lfilename here.  */
 445   int changedp = 0;
 446
 447   if (!opt.lfilename)
 448     {
 449       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 450       changedp = 1;
 451     }
 452   pid = fork ();
 453   if (pid < 0)
 454     {
 455       /* parent, error */
 456       perror ("fork");
 457       exit (1);
 458     }
 459   else if (pid != 0)
 460     {
 461       /* parent, no error */
 462       printf (_("Continuing in background.\n"));
 463       if (changedp)
 464         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 465       exit (0);
 466     }
 467   /* child: keep running */
 468 }
 469 #endif /* not WINDOWS */
 470 \f
 471 #if 0
 472 /* debug */
 473 char *
 474 ps (char *orig)
 475 {
 476   char *r = xstrdup (orig);
 477   path_simplify (r);
 478   return r;
 479 }
 480 #endif
 481
 482 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 483    in that:
 484         Multple `/'s are collapsed to a single `/'.
 485         Leading `./'s and trailing `/.'s are removed.
 486         Trailing `/'s are removed.
 487         Non-leading `../'s and trailing `..'s are handled by removing
 488         portions of the path.
 489
 490    E.g. "a/b/c/./../d/.." will yield "a/b/".  This function originates
 491    from GNU Bash and has been mutilated to unrecognition for use in
 492    Wget.
 493
 494    Changes for Wget:
 495         Always use '/' as stub_char.
 496         Don't check for local things using canon_stat.
 497         Change the original string instead of strdup-ing.
 498         React correctly when beginning with `./' and `../'.
 499         Don't zip out trailing slashes.
 500         Return a value indicating whether any modifications took place.
 501
 502    If you dare change this function, take a careful look at the test
 503    cases below, and make sure that they pass.  */
 504
 505 int
 506 path_simplify (char *path)
 507 {
 508   register int i, start;
 509   int changes = 0;
 510   char stub_char;
 511
 512   if (!*path)
 513     return 0;
 514
 515   stub_char = '/';
 516
 517   if (path[0] == '/')
 518     /* Preserve initial '/'. */
 519     ++path;
 520
 521   /* Nix out leading `.' or `..' with.  */
 522   if ((path[0] == '.' && path[1] == '\0')
 523       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 524     {
 525       path[0] = '\0';
 526       changes = 1;
 527       return changes;
 528     }
 529
 530   /* Walk along PATH looking for things to compact.  */
 531   i = 0;
 532   while (1)
 533     {
 534       if (!path[i])
 535         break;
 536
 537       while (path[i] && path[i] != '/')
 538         i++;
 539
 540       start = i++;
 541
 542       /* If we didn't find any slashes, then there is nothing left to do.  */
 543       if (!path[start])
 544         break;
 545
 546       /* Handle multiple `/'s in a row.  */
 547       while (path[i] == '/')
 548         i++;
 549
 550       if ((start + 1) != i)
 551         {
 552           strcpy (path + start + 1, path + i);
 553           i = start + 1;
 554           changes = 1;
 555         }
 556
 557       /* Check for `../', `./' or trailing `.' by itself.  */
 558       if (path[i] == '.')
 559         {
 560           /* Handle trailing `.' by itself.  */
 561           if (!path[i + 1])
 562             {
 563               path[--i] = '\0';
 564               changes = 1;
 565               break;
 566             }
 567
 568           /* Handle `./'.  */
 569           if (path[i + 1] == '/')
 570             {
 571               strcpy (path + i, path + i + 1);
 572               i = (start < 0) ? 0 : start;
 573               changes = 1;
 574               continue;
 575             }
 576
 577           /* Handle `../' or trailing `..' by itself.  */
 578           if (path[i + 1] == '.' &&
 579               (path[i + 2] == '/' || !path[i + 2]))
 580             {
 581               while (--start > -1 && path[start] != '/');
 582               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 583               i = (start < 0) ? 0 : start;
 584               changes = 1;
 585               continue;
 586             }
 587         }       /* path == '.' */
 588     } /* while */
 589
 590   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 591   i = 0;
 592   while (1)
 593     {
 594       if (path[i] == '.' && path[i + 1] == '/')
 595         i += 2;
 596       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 597         i += 3;
 598       else
 599         break;
 600     }
 601   if (i)
 602     {
 603       strcpy (path, path + i - 0);
 604       changes = 1;
 605     }
 606
 607   return changes;
 608 }
 609
 610 /* Test cases:
 611    ps("")                   -> ""
 612    ps("/")                  -> "/"
 613    ps(".")                  -> ""
 614    ps("..")                 -> ""
 615    ps("/.")                 -> "/"
 616    ps("/..")                -> "/"
 617    ps("foo")                -> "foo"
 618    ps("foo/bar")            -> "foo/bar"
 619    ps("foo//bar")           -> "foo/bar"             (possibly a bug)
 620    ps("foo/../bar")         -> "bar"
 621    ps("foo/bar/..")         -> "foo/"
 622    ps("foo/bar/../x")       -> "foo/x"
 623    ps("foo/bar/../x/")      -> "foo/x/"
 624    ps("foo/..")             -> ""
 625    ps("/foo/..")            -> "/"
 626    ps("a/b/../../c")        -> "c"
 627    ps("/a/b/../../c")       -> "/c"
 628    ps("./a/../b")           -> "b"
 629    ps("/./a/../b")          -> "/b"
 630 */
 631 \f
 632 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 633    specified with TM.  */
 634 void
 635 touch (const char *file, time_t tm)
 636 {
 637 #ifdef HAVE_STRUCT_UTIMBUF
 638   struct utimbuf times;
 639   times.actime = times.modtime = tm;
 640 #else
 641   time_t times[2];
 642   times[0] = times[1] = tm;
 643 #endif
 644
 645   if (utime (file, &times) == -1)
 646     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 647 }
 648
 649 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 650    nothing under MS-Windows.  */
 651 int
 652 remove_link (const char *file)
 653 {
 654   int err = 0;
 655   struct stat st;
 656
 657   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 658     {
 659       DEBUGP (("Unlinking %s (symlink).\n", file));
 660       err = unlink (file);
 661       if (err != 0)
 662         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 663                    file, strerror (errno));
 664     }
 665   return err;
 666 }
 667
 668 /* Does FILENAME exist?  This is quite a lousy implementation, since
 669    it supplies no error codes -- only a yes-or-no answer.  Thus it
 670    will return that a file does not exist if, e.g., the directory is
 671    unreadable.  I don't mind it too much currently, though.  The
 672    proper way should, of course, be to have a third, error state,
 673    other than true/false, but that would introduce uncalled-for
 674    additional complexity to the callers.  */
 675 int
 676 file_exists_p (const char *filename)
 677 {
 678 #ifdef HAVE_ACCESS
 679   return access (filename, F_OK) >= 0;
 680 #else
 681   struct stat buf;
 682   return stat (filename, &buf) >= 0;
 683 #endif
 684 }
 685
 686 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 687    Returns 0 on error.  */
 688 int
 689 file_non_directory_p (const char *path)
 690 {
 691   struct stat buf;
 692   /* Use lstat() rather than stat() so that symbolic links pointing to
 693      directories can be identified correctly.  */
 694   if (lstat (path, &buf) != 0)
 695     return 0;
 696   return S_ISDIR (buf.st_mode) ? 0 : 1;
 697 }
 698
 699 /* Return a unique filename, given a prefix and count */
 700 static char *
 701 unique_name_1 (const char *fileprefix, int count)
 702 {
 703   char *filename;
 704
 705   if (count)
 706     {
 707       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 708       sprintf (filename, "%s.%d", fileprefix, count);
 709     }
 710   else
 711     filename = xstrdup (fileprefix);
 712
 713   if (!file_exists_p (filename))
 714     return filename;
 715   else
 716     {
 717       xfree (filename);
 718       return NULL;
 719     }
 720 }
 721
 722 /* Return a unique file name, based on PREFIX.  */
 723 char *
 724 unique_name (const char *prefix)
 725 {
 726   char *file = NULL;
 727   int count = 0;
 728
 729   while (!file)
 730     file = unique_name_1 (prefix, count++);
 731   return file;
 732 }
 733 \f
 734 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 735    are missing, create them first.  In case any mkdir() call fails,
 736    return its error status.  Returns 0 on successful completion.
 737
 738    The behaviour of this function should be identical to the behaviour
 739    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 740 int
 741 make_directory (const char *directory)
 742 {
 743   int quit = 0;
 744   int i;
 745   char *dir;
 746
 747   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 748      function is unsafe if called with a read-only char *argument.  */
 749   STRDUP_ALLOCA (dir, directory);
 750
 751   /* If the first character of dir is '/', skip it (and thus enable
 752      creation of absolute-pathname directories.  */
 753   for (i = (*dir == '/'); 1; ++i)
 754     {
 755       for (; dir[i] && dir[i] != '/'; i++)
 756         ;
 757       if (!dir[i])
 758         quit = 1;
 759       dir[i] = '\0';
 760       /* Check whether the directory already exists.  */
 761       if (!file_exists_p (dir))
 762         {
 763           if (mkdir (dir, 0777) < 0)
 764             return -1;
 765         }
 766       if (quit)
 767         break;
 768       else
 769         dir[i] = '/';
 770     }
 771   return 0;
 772 }
 773
 774 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 775    should be a file name.
 776
 777    file_merge("/foo/bar", "baz")  => "/foo/baz"
 778    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 779    file_merge("foo", "bar")       => "bar"
 780
 781    In other words, it's a simpler and gentler version of uri_merge_1.  */
 782
 783 char *
 784 file_merge (const char *base, const char *file)
 785 {
 786   char *result;
 787   const char *cut = (const char *)strrchr (base, '/');
 788
 789   if (!cut)
 790     return xstrdup (file);
 791
 792   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 793   memcpy (result, base, cut - base);
 794   result[cut - base] = '/';
 795   strcpy (result + (cut - base) + 1, file);
 796
 797   return result;
 798 }
 799 \f
 800 static int in_acclist PARAMS ((const char *const *, const char *, int));
 801
 802 /* Determine whether a file is acceptable to be followed, according to
 803    lists of patterns to accept/reject.  */
 804 int
 805 acceptable (const char *s)
 806 {
 807   int l = strlen (s);
 808
 809   while (l && s[l] != '/')
 810     --l;
 811   if (s[l] == '/')
 812     s += (l + 1);
 813   if (opt.accepts)
 814     {
 815       if (opt.rejects)
 816         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 817                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 818       else
 819         return in_acclist ((const char *const *)opt.accepts, s, 1);
 820     }
 821   else if (opt.rejects)
 822     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 823   return 1;
 824 }
 825
 826 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 827    `/something', frontcmp() will return 1 only if S2 begins with
 828    `/something'.  Otherwise, 0 is returned.  */
 829 int
 830 frontcmp (const char *s1, const char *s2)
 831 {
 832   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 833   return !*s1;
 834 }
 835
 836 /* Iterate through STRLIST, and return the first element that matches
 837    S, through wildcards or front comparison (as appropriate).  */
 838 static char *
 839 proclist (char **strlist, const char *s, enum accd flags)
 840 {
 841   char **x;
 842
 843   for (x = strlist; *x; x++)
 844     if (has_wildcards_p (*x))
 845       {
 846         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 847           break;
 848       }
 849     else
 850       {
 851         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 852         if (frontcmp (p, s))
 853           break;
 854       }
 855   return *x;
 856 }
 857
 858 /* Returns whether DIRECTORY is acceptable for download, wrt the
 859    include/exclude lists.
 860
 861    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 862    and absolute paths may be freely intermixed.  */
 863 int
 864 accdir (const char *directory, enum accd flags)
 865 {
 866   /* Remove starting '/'.  */
 867   if (flags & ALLABS && *directory == '/')
 868     ++directory;
 869   if (opt.includes)
 870     {
 871       if (!proclist (opt.includes, directory, flags))
 872         return 0;
 873     }
 874   if (opt.excludes)
 875     {
 876       if (proclist (opt.excludes, directory, flags))
 877         return 0;
 878     }
 879   return 1;
 880 }
 881
 882 /* Match the end of STRING against PATTERN.  For instance:
 883
 884    match_backwards ("abc", "bc") -> 1
 885    match_backwards ("abc", "ab") -> 0
 886    match_backwards ("abc", "abc") -> 1 */
 887 int
 888 match_tail (const char *string, const char *pattern)
 889 {
 890   int i, j;
 891
 892   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 893     if (string[i] != pattern[j])
 894       break;
 895   /* If the pattern was exhausted, the match was succesful.  */
 896   if (j == -1)
 897     return 1;
 898   else
 899     return 0;
 900 }
 901
 902 /* Checks whether string S matches each element of ACCEPTS.  A list
 903    element are matched either with fnmatch() or match_tail(),
 904    according to whether the element contains wildcards or not.
 905
 906    If the BACKWARD is 0, don't do backward comparison -- just compare
 907    them normally.  */
 908 static int
 909 in_acclist (const char *const *accepts, const char *s, int backward)
 910 {
 911   for (; *accepts; accepts++)
 912     {
 913       if (has_wildcards_p (*accepts))
 914         {
 915           /* fnmatch returns 0 if the pattern *does* match the
 916              string.  */
 917           if (fnmatch (*accepts, s, 0) == 0)
 918             return 1;
 919         }
 920       else
 921         {
 922           if (backward)
 923             {
 924               if (match_tail (s, *accepts))
 925                 return 1;
 926             }
 927           else
 928             {
 929               if (!strcmp (s, *accepts))
 930                 return 1;
 931             }
 932         }
 933     }
 934   return 0;
 935 }
 936
 937 /* Return the location of STR's suffix (file extension).  Examples:
 938    suffix ("foo.bar")       -> "bar"
 939    suffix ("foo.bar.baz")   -> "baz"
 940    suffix ("/foo/bar")      -> NULL
 941    suffix ("/foo.bar/baz")  -> NULL  */
 942 char *
 943 suffix (const char *str)
 944 {
 945   int i;
 946
 947   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 948     ;
 949
 950   if (str[i++] == '.')
 951     return (char *)str + i;
 952   else
 953     return NULL;
 954 }
 955
 956 /* Read a line from FP.  The function reallocs the storage as needed
 957    to accomodate for any length of the line.  Reallocs are done
 958    exponentially, doubling the storage after each overflow to minimize
 959    the number of calls to realloc() and fgets().  The newline
 960    character at the end of line is retained.
 961
 962    After end-of-file is encountered without anything being read, NULL
 963    is returned.  NULL is also returned on error.  To distinguish
 964    between these two cases, use the stdio function ferror().
 965
 966    A future version of this function will be rewritten to use fread()
 967    instead of fgets(), and to return the length of the line, which
 968    will make the function usable on files with binary content.  */
 969
 970 char *
 971 read_whole_line (FILE *fp)
 972 {
 973   int length = 0;
 974   int bufsize = 81;
 975   char *line = (char *)xmalloc (bufsize);
 976
 977   while (fgets (line + length, bufsize - length, fp))
 978     {
 979       length += strlen (line + length);
 980       if (length == 0)
 981         /* Possible for example when reading from a binary file where
 982            a line begins with \0.  */
 983         continue;
 984
 985       if (line[length - 1] == '\n')
 986         break;
 987
 988       /* fgets() guarantees to read the whole line, or to use up the
 989          space we've given it.  We can double the buffer
 990          unconditionally.  */
 991       bufsize <<= 1;
 992       line = xrealloc (line, bufsize);
 993     }
 994   if (length == 0 || ferror (fp))
 995     {
 996       xfree (line);
 997       return NULL;
 998     }
 999   if (length + 1 < bufsize)
1000     /* Relieve the memory from our exponential greediness.  We say
1001        `length + 1' because the terminating \0 is not included in
1002        LENGTH.  We don't need to zero-terminate the string ourselves,
1003        though, because fgets() does that.  */
1004     line = xrealloc (line, length + 1);
1005   return line;
1006 }
1007 \f
1008 /* Read FILE into memory.  A pointer to `struct file_memory' are
1009    returned; use struct element `content' to access file contents, and
1010    the element `length' to know the file length.  `content' is *not*
1011    zero-terminated, and you should *not* read or write beyond the [0,
1012    length) range of characters.
1013
1014    After you are done with the file contents, call read_file_free to
1015    release the memory.
1016
1017    Depending on the operating system and the type of file that is
1018    being read, read_file() either mmap's the file into memory, or
1019    reads the file into the core using read().
1020
1021    If file is named "-", fileno(stdin) is used for reading instead.
1022    If you want to read from a real file named "-", use "./-" instead.  */
1023
1024 struct file_memory *
1025 read_file (const char *file)
1026 {
1027   int fd;
1028   struct file_memory *fm;
1029   long size;
1030   int inhibit_close = 0;
1031
1032   /* Some magic in the finest tradition of Perl and its kin: if FILE
1033      is "-", just use stdin.  */
1034   if (HYPHENP (file))
1035     {
1036       fd = fileno (stdin);
1037       inhibit_close = 1;
1038       /* Note that we don't inhibit mmap() in this case.  If stdin is
1039          redirected from a regular file, mmap() will still work.  */
1040     }
1041   else
1042     fd = open (file, O_RDONLY);
1043   if (fd < 0)
1044     return NULL;
1045   fm = xmalloc (sizeof (struct file_memory));
1046
1047 #ifdef HAVE_MMAP
1048   {
1049     struct stat buf;
1050     if (fstat (fd, &buf) < 0)
1051       goto mmap_lose;
1052     fm->length = buf.st_size;
1053     /* NOTE: As far as I know, the callers of this function never
1054        modify the file text.  Relying on this would enable us to
1055        specify PROT_READ and MAP_SHARED for a marginal gain in
1056        efficiency, but at some cost to generality.  */
1057     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1058                         MAP_PRIVATE, fd, 0);
1059     if (fm->content == (char *)MAP_FAILED)
1060       goto mmap_lose;
1061     if (!inhibit_close)
1062       close (fd);
1063
1064     fm->mmap_p = 1;
1065     return fm;
1066   }
1067
1068  mmap_lose:
1069   /* The most common reason why mmap() fails is that FD does not point
1070      to a plain file.  However, it's also possible that mmap() doesn't
1071      work for a particular type of file.  Therefore, whenever mmap()
1072      fails, we just fall back to the regular method.  */
1073 #endif /* HAVE_MMAP */
1074
1075   fm->length = 0;
1076   size = 512;                   /* number of bytes fm->contents can
1077                                    hold at any given time. */
1078   fm->content = xmalloc (size);
1079   while (1)
1080     {
1081       long nread;
1082       if (fm->length > size / 2)
1083         {
1084           /* #### I'm not sure whether the whole exponential-growth
1085              thing makes sense with kernel read.  On Linux at least,
1086              read() refuses to read more than 4K from a file at a
1087              single chunk anyway.  But other Unixes might optimize it
1088              better, and it doesn't *hurt* anything, so I'm leaving
1089              it.  */
1090
1091           /* Normally, we grow SIZE exponentially to make the number
1092              of calls to read() and realloc() logarithmic in relation
1093              to file size.  However, read() can read an amount of data
1094              smaller than requested, and it would be unreasonably to
1095              double SIZE every time *something* was read.  Therefore,
1096              we double SIZE only when the length exceeds half of the
1097              entire allocated size.  */
1098           size <<= 1;
1099           fm->content = xrealloc (fm->content, size);
1100         }
1101       nread = read (fd, fm->content + fm->length, size - fm->length);
1102       if (nread > 0)
1103         /* Successful read. */
1104         fm->length += nread;
1105       else if (nread < 0)
1106         /* Error. */
1107         goto lose;
1108       else
1109         /* EOF */
1110         break;
1111     }
1112   if (!inhibit_close)
1113     close (fd);
1114   if (size > fm->length && fm->length != 0)
1115     /* Due to exponential growth of fm->content, the allocated region
1116        might be much larger than what is actually needed.  */
1117     fm->content = xrealloc (fm->content, fm->length);
1118   fm->mmap_p = 0;
1119   return fm;
1120
1121  lose:
1122   if (!inhibit_close)
1123     close (fd);
1124   xfree (fm->content);
1125   xfree (fm);
1126   return NULL;
1127 }
1128
1129 /* Release the resources held by FM.  Specifically, this calls
1130    munmap() or xfree() on fm->content, depending whether mmap or
1131    malloc/read were used to read in the file.  It also frees the
1132    memory needed to hold the FM structure itself.  */
1133
1134 void
1135 read_file_free (struct file_memory *fm)
1136 {
1137 #ifdef HAVE_MMAP
1138   if (fm->mmap_p)
1139     {
1140       munmap (fm->content, fm->length);
1141     }
1142   else
1143 #endif
1144     {
1145       xfree (fm->content);
1146     }
1147   xfree (fm);
1148 }
1149 \f
1150 /* Free the pointers in a NULL-terminated vector of pointers, then
1151    free the pointer itself.  */
1152 void
1153 free_vec (char **vec)
1154 {
1155   if (vec)
1156     {
1157       char **p = vec;
1158       while (*p)
1159         xfree (*p++);
1160       xfree (vec);
1161     }
1162 }
1163
1164 /* Append vector V2 to vector V1.  The function frees V2 and
1165    reallocates V1 (thus you may not use the contents of neither
1166    pointer after the call).  If V1 is NULL, V2 is returned.  */
1167 char **
1168 merge_vecs (char **v1, char **v2)
1169 {
1170   int i, j;
1171
1172   if (!v1)
1173     return v2;
1174   if (!v2)
1175     return v1;
1176   if (!*v2)
1177     {
1178       /* To avoid j == 0 */
1179       xfree (v2);
1180       return v1;
1181     }
1182   /* Count v1.  */
1183   for (i = 0; v1[i]; i++);
1184   /* Count v2.  */
1185   for (j = 0; v2[j]; j++);
1186   /* Reallocate v1.  */
1187   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1188   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1189   xfree (v2);
1190   return v1;
1191 }
1192
1193 /* A set of simple-minded routines to store strings in a linked list.
1194    This used to also be used for searching, but now we have hash
1195    tables for that.  */
1196
1197 /* It's a shame that these simple things like linked lists and hash
1198    tables (see hash.c) need to be implemented over and over again.  It
1199    would be nice to be able to use the routines from glib -- see
1200    www.gtk.org for details.  However, that would make Wget depend on
1201    glib, and I want to avoid dependencies to external libraries for
1202    reasons of convenience and portability (I suspect Wget is more
1203    portable than anything ever written for Gnome).  */
1204
1205 /* Append an element to the list.  If the list has a huge number of
1206    elements, this can get slow because it has to find the list's
1207    ending.  If you think you have to call slist_append in a loop,
1208    think about calling slist_prepend() followed by slist_nreverse().  */
1209
1210 slist *
1211 slist_append (slist *l, const char *s)
1212 {
1213   slist *newel = (slist *)xmalloc (sizeof (slist));
1214   slist *beg = l;
1215
1216   newel->string = xstrdup (s);
1217   newel->next = NULL;
1218
1219   if (!l)
1220     return newel;
1221   /* Find the last element.  */
1222   while (l->next)
1223     l = l->next;
1224   l->next = newel;
1225   return beg;
1226 }
1227
1228 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1229
1230 slist *
1231 slist_prepend (slist *l, const char *s)
1232 {
1233   slist *newel = (slist *)xmalloc (sizeof (slist));
1234   newel->string = xstrdup (s);
1235   newel->next = l;
1236   return newel;
1237 }
1238
1239 /* Destructively reverse L. */
1240
1241 slist *
1242 slist_nreverse (slist *l)
1243 {
1244   slist *prev = NULL;
1245   while (l)
1246     {
1247       slist *next = l->next;
1248       l->next = prev;
1249       prev = l;
1250       l = next;
1251     }
1252   return prev;
1253 }
1254
1255 /* Is there a specific entry in the list?  */
1256 int
1257 slist_contains (slist *l, const char *s)
1258 {
1259   for (; l; l = l->next)
1260     if (!strcmp (l->string, s))
1261       return 1;
1262   return 0;
1263 }
1264
1265 /* Free the whole slist.  */
1266 void
1267 slist_free (slist *l)
1268 {
1269   while (l)
1270     {
1271       slist *n = l->next;
1272       xfree (l->string);
1273       xfree (l);
1274       l = n;
1275     }
1276 }
1277 \f
1278 /* Sometimes it's useful to create "sets" of strings, i.e. special
1279    hash tables where you want to store strings as keys and merely
1280    query for their existence.  Here is a set of utility routines that
1281    makes that transparent.  */
1282
1283 void
1284 string_set_add (struct hash_table *ht, const char *s)
1285 {
1286   /* First check whether the set element already exists.  If it does,
1287      do nothing so that we don't have to free() the old element and
1288      then strdup() a new one.  */
1289   if (hash_table_contains (ht, s))
1290     return;
1291
1292   /* We use "1" as value.  It provides us a useful and clear arbitrary
1293      value, and it consumes no memory -- the pointers to the same
1294      string "1" will be shared by all the key-value pairs in all `set'
1295      hash tables.  */
1296   hash_table_put (ht, xstrdup (s), "1");
1297 }
1298
1299 /* Synonym for hash_table_contains... */
1300
1301 int
1302 string_set_contains (struct hash_table *ht, const char *s)
1303 {
1304   return hash_table_contains (ht, s);
1305 }
1306
1307 static int
1308 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1309 {
1310   xfree (key);
1311   return 0;
1312 }
1313
1314 void
1315 string_set_free (struct hash_table *ht)
1316 {
1317   hash_table_map (ht, string_set_free_mapper, NULL);
1318   hash_table_destroy (ht);
1319 }
1320
1321 static int
1322 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1323 {
1324   xfree (key);
1325   xfree (value);
1326   return 0;
1327 }
1328
1329 /* Another utility function: call free() on all keys and values of HT.  */
1330
1331 void
1332 free_keys_and_values (struct hash_table *ht)
1333 {
1334   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1335 }
1336
1337 \f
1338 /* Engine for legible and legible_very_long; this function works on
1339    strings.  */
1340
1341 static char *
1342 legible_1 (const char *repr)
1343 {
1344   static char outbuf[128];
1345   int i, i1, mod;
1346   char *outptr;
1347   const char *inptr;
1348
1349   /* Reset the pointers.  */
1350   outptr = outbuf;
1351   inptr = repr;
1352   /* If the number is negative, shift the pointers.  */
1353   if (*inptr == '-')
1354     {
1355       *outptr++ = '-';
1356       ++inptr;
1357     }
1358   /* How many digits before the first separator?  */
1359   mod = strlen (inptr) % 3;
1360   /* Insert them.  */
1361   for (i = 0; i < mod; i++)
1362     *outptr++ = inptr[i];
1363   /* Now insert the rest of them, putting separator before every
1364      third digit.  */
1365   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1366     {
1367       if (i % 3 == 0 && i1 != 0)
1368         *outptr++ = ',';
1369       *outptr++ = inptr[i1];
1370     }
1371   /* Zero-terminate the string.  */
1372   *outptr = '\0';
1373   return outbuf;
1374 }
1375
1376 /* Legible -- return a static pointer to the legibly printed long.  */
1377 char *
1378 legible (long l)
1379 {
1380   char inbuf[24];
1381   /* Print the number into the buffer.  */
1382   long_to_string (inbuf, l);
1383   return legible_1 (inbuf);
1384 }
1385
1386 /* Write a string representation of NUMBER into the provided buffer.
1387    We cannot use sprintf() because we cannot be sure whether the
1388    platform supports printing of what we chose for VERY_LONG_TYPE.
1389
1390    Example: Gcc supports `long long' under many platforms, but on many
1391    of those the native libc knows nothing of it and therefore cannot
1392    print it.
1393
1394    How long BUFFER needs to be depends on the platform and the content
1395    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1396    bytes are sufficient.  Using more might be a good idea.
1397
1398    This function does not go through the hoops that long_to_string
1399    goes to because it doesn't aspire to be fast.  (It's called perhaps
1400    once in a Wget run.)  */
1401
1402 static void
1403 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1404 {
1405   int i = 0;
1406   int j;
1407
1408   /* Print the number backwards... */
1409   do
1410     {
1411       buffer[i++] = '0' + number % 10;
1412       number /= 10;
1413     }
1414   while (number);
1415
1416   /* ...and reverse the order of the digits. */
1417   for (j = 0; j < i / 2; j++)
1418     {
1419       char c = buffer[j];
1420       buffer[j] = buffer[i - 1 - j];
1421       buffer[i - 1 - j] = c;
1422     }
1423   buffer[i] = '\0';
1424 }
1425
1426 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1427 char *
1428 legible_very_long (VERY_LONG_TYPE l)
1429 {
1430   char inbuf[128];
1431   /* Print the number into the buffer.  */
1432   very_long_to_string (inbuf, l);
1433   return legible_1 (inbuf);
1434 }
1435
1436 /* Count the digits in a (long) integer.  */
1437 int
1438 numdigit (long a)
1439 {
1440   int res = 1;
1441   if (a < 0)
1442     {
1443       a = -a;
1444       ++res;
1445     }
1446   while ((a /= 10) != 0)
1447     ++res;
1448   return res;
1449 }
1450
1451 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1452 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1453
1454 #define DIGITS_1(figure) ONE_DIGIT (figure)
1455 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1456 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1457 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1458 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1459 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1460 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1461 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1462 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1463 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1464
1465 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1466
1467 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1468 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1469 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1470 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1471 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1472 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1473 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1474 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1475 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1476
1477 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1478    to `sprintf(buffer, "%ld", number)', only much faster.
1479
1480    The speedup may make a difference in programs that frequently
1481    convert numbers to strings.  Some implementations of sprintf,
1482    particularly the one in GNU libc, have been known to be extremely
1483    slow compared to this function.
1484
1485    BUFFER should accept as many bytes as you expect the number to take
1486    up.  On machines with 64-bit longs the maximum needed size is 24
1487    bytes.  That includes the worst-case digits, the optional `-' sign,
1488    and the trailing \0.  */
1489
1490 void
1491 long_to_string (char *buffer, long number)
1492 {
1493   char *p = buffer;
1494   long n = number;
1495
1496 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1497   /* We are running in a strange or misconfigured environment.  Let
1498      sprintf cope with it.  */
1499   sprintf (buffer, "%ld", n);
1500 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1501
1502   if (n < 0)
1503     {
1504       *p++ = '-';
1505       n = -n;
1506     }
1507
1508   if      (n < 10)                   { DIGITS_1 (1); }
1509   else if (n < 100)                  { DIGITS_2 (10); }
1510   else if (n < 1000)                 { DIGITS_3 (100); }
1511   else if (n < 10000)                { DIGITS_4 (1000); }
1512   else if (n < 100000)               { DIGITS_5 (10000); }
1513   else if (n < 1000000)              { DIGITS_6 (100000); }
1514   else if (n < 10000000)             { DIGITS_7 (1000000); }
1515   else if (n < 100000000)            { DIGITS_8 (10000000); }
1516   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1517 #if SIZEOF_LONG == 4
1518   /* ``if (1)'' serves only to preserve editor indentation. */
1519   else if (1)                        { DIGITS_10 (1000000000); }
1520 #else  /* SIZEOF_LONG != 4 */
1521   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1522   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1523   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1524   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1525   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1526   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1527   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1528   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1529   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1530   else                               { DIGITS_19 (1000000000000000000L); }
1531 #endif /* SIZEOF_LONG != 4 */
1532
1533   *p = '\0';
1534 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1535 }
1536
1537 #undef ONE_DIGIT
1538 #undef ONE_DIGIT_ADVANCE
1539
1540 #undef DIGITS_1
1541 #undef DIGITS_2
1542 #undef DIGITS_3
1543 #undef DIGITS_4
1544 #undef DIGITS_5
1545 #undef DIGITS_6
1546 #undef DIGITS_7
1547 #undef DIGITS_8
1548 #undef DIGITS_9
1549 #undef DIGITS_10
1550 #undef DIGITS_11
1551 #undef DIGITS_12
1552 #undef DIGITS_13
1553 #undef DIGITS_14
1554 #undef DIGITS_15
1555 #undef DIGITS_16
1556 #undef DIGITS_17
1557 #undef DIGITS_18
1558 #undef DIGITS_19
1559 \f
1560 /* Support for timers. */
1561
1562 #undef TIMER_WINDOWS
1563 #undef TIMER_GETTIMEOFDAY
1564 #undef TIMER_TIME
1565
1566 /* Depending on the OS and availability of gettimeofday(), one and
1567    only one of the above constants will be defined.  Virtually all
1568    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1569    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1570    non-Windows systems without gettimeofday.
1571
1572    #### Perhaps we should also support ftime(), which exists on old
1573    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1574    C, if memory serves me.)  */
1575
1576 #ifdef WINDOWS
1577 # define TIMER_WINDOWS
1578 #else  /* not WINDOWS */
1579 # ifdef HAVE_GETTIMEOFDAY
1580 #  define TIMER_GETTIMEOFDAY
1581 # else
1582 #  define TIMER_TIME
1583 # endif
1584 #endif /* not WINDOWS */
1585
1586 struct wget_timer {
1587 #ifdef TIMER_GETTIMEOFDAY
1588   long secs;
1589   long usecs;
1590 #endif
1591
1592 #ifdef TIMER_TIME
1593   time_t secs;
1594 #endif
1595
1596 #ifdef TIMER_WINDOWS
1597   ULARGE_INTEGER wintime;
1598 #endif
1599 };
1600
1601 /* Allocate a timer.  It is not legal to do anything with a freshly
1602    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1603
1604 struct wget_timer *
1605 wtimer_allocate (void)
1606 {
1607   struct wget_timer *wt =
1608     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1609   return wt;
1610 }
1611
1612 /* Allocate a new timer and reset it.  Return the new timer. */
1613
1614 struct wget_timer *
1615 wtimer_new (void)
1616 {
1617   struct wget_timer *wt = wtimer_allocate ();
1618   wtimer_reset (wt);
1619   return wt;
1620 }
1621
1622 /* Free the resources associated with the timer.  Its further use is
1623    prohibited.  */
1624
1625 void
1626 wtimer_delete (struct wget_timer *wt)
1627 {
1628   xfree (wt);
1629 }
1630
1631 /* Reset timer WT.  This establishes the starting point from which
1632    wtimer_elapsed() will return the number of elapsed
1633    milliseconds.  It is allowed to reset a previously used timer.  */
1634
1635 void
1636 wtimer_reset (struct wget_timer *wt)
1637 {
1638 #ifdef TIMER_GETTIMEOFDAY
1639   struct timeval t;
1640   gettimeofday (&t, NULL);
1641   wt->secs  = t.tv_sec;
1642   wt->usecs = t.tv_usec;
1643 #endif
1644
1645 #ifdef TIMER_TIME
1646   wt->secs = time (NULL);
1647 #endif
1648
1649 #ifdef TIMER_WINDOWS
1650   FILETIME ft;
1651   SYSTEMTIME st;
1652   GetSystemTime (&st);
1653   SystemTimeToFileTime (&st, &ft);
1654   wt->wintime.HighPart = ft.dwHighDateTime;
1655   wt->wintime.LowPart  = ft.dwLowDateTime;
1656 #endif
1657 }
1658
1659 /* Return the number of milliseconds elapsed since the timer was last
1660    reset.  It is allowed to call this function more than once to get
1661    increasingly higher elapsed values.  */
1662
1663 long
1664 wtimer_elapsed (struct wget_timer *wt)
1665 {
1666 #ifdef TIMER_GETTIMEOFDAY
1667   struct timeval t;
1668   gettimeofday (&t, NULL);
1669   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1670 #endif
1671
1672 #ifdef TIMER_TIME
1673   time_t now = time (NULL);
1674   return 1000 * (now - wt->secs);
1675 #endif
1676
1677 #ifdef WINDOWS
1678   FILETIME ft;
1679   SYSTEMTIME st;
1680   ULARGE_INTEGER uli;
1681   GetSystemTime (&st);
1682   SystemTimeToFileTime (&st, &ft);
1683   uli.HighPart = ft.dwHighDateTime;
1684   uli.LowPart = ft.dwLowDateTime;
1685   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1686 #endif
1687 }
1688
1689 /* Return the assessed granularity of the timer implementation.  This
1690    is important for certain code that tries to deal with "zero" time
1691    intervals.  */
1692
1693 long
1694 wtimer_granularity (void)
1695 {
1696 #ifdef TIMER_GETTIMEOFDAY
1697   /* Granularity of gettimeofday is hugely architecture-dependent.
1698      However, it appears that on modern machines it is better than
1699      1ms.  */
1700   return 1;
1701 #endif
1702
1703 #ifdef TIMER_TIME
1704   /* This is clear. */
1705   return 1000;
1706 #endif
1707
1708 #ifdef TIMER_WINDOWS
1709   /* ? */
1710   return 1;
1711 #endif
1712 }
1713 \f
1714 /* This should probably be at a better place, but it doesn't really
1715    fit into html-parse.c.  */
1716
1717 /* The function returns the pointer to the malloc-ed quoted version of
1718    string s.  It will recognize and quote numeric and special graphic
1719    entities, as per RFC1866:
1720
1721    `&' -> `&amp;'
1722    `<' -> `&lt;'
1723    `>' -> `&gt;'
1724    `"' -> `&quot;'
1725    SP  -> `&#32;'
1726
1727    No other entities are recognized or replaced.  */
1728 char *
1729 html_quote_string (const char *s)
1730 {
1731   const char *b = s;
1732   char *p, *res;
1733   int i;
1734
1735   /* Pass through the string, and count the new size.  */
1736   for (i = 0; *s; s++, i++)
1737     {
1738       if (*s == '&')
1739         i += 4;                 /* `amp;' */
1740       else if (*s == '<' || *s == '>')
1741         i += 3;                 /* `lt;' and `gt;' */
1742       else if (*s == '\"')
1743         i += 5;                 /* `quot;' */
1744       else if (*s == ' ')
1745         i += 4;                 /* #32; */
1746     }
1747   res = (char *)xmalloc (i + 1);
1748   s = b;
1749   for (p = res; *s; s++)
1750     {
1751       switch (*s)
1752         {
1753         case '&':
1754           *p++ = '&';
1755           *p++ = 'a';
1756           *p++ = 'm';
1757           *p++ = 'p';
1758           *p++ = ';';
1759           break;
1760         case '<': case '>':
1761           *p++ = '&';
1762           *p++ = (*s == '<' ? 'l' : 'g');
1763           *p++ = 't';
1764           *p++ = ';';
1765           break;
1766         case '\"':
1767           *p++ = '&';
1768           *p++ = 'q';
1769           *p++ = 'u';
1770           *p++ = 'o';
1771           *p++ = 't';
1772           *p++ = ';';
1773           break;
1774         case ' ':
1775           *p++ = '&';
1776           *p++ = '#';
1777           *p++ = '3';
1778           *p++ = '2';
1779           *p++ = ';';
1780           break;
1781         default:
1782           *p++ = *s;
1783         }
1784     }
1785   *p = '\0';
1786   return res;
1787 }
1788
1789 /* Determine the width of the terminal we're running on.  If that's
1790    not possible, return 0.  */
1791
1792 int
1793 determine_screen_width (void)
1794 {
1795   /* If there's a way to get the terminal size using POSIX
1796      tcgetattr(), somebody please tell me.  */
1797 #ifndef TIOCGWINSZ
1798   return 0;
1799 #else  /* TIOCGWINSZ */
1800   int fd;
1801   struct winsize wsz;
1802
1803   if (opt.lfilename != NULL)
1804     return 0;
1805
1806   fd = fileno (stderr);
1807   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1808     return 0;                   /* most likely ENOTTY */
1809
1810   return wsz.ws_col;
1811 #endif /* TIOCGWINSZ */
1812 }
1813
1814 #if 0
1815 /* A debugging function for checking whether an MD5 library works. */
1816
1817 #include "gen-md5.h"
1818
1819 char *
1820 debug_test_md5 (char *buf)
1821 {
1822   unsigned char raw[16];
1823   static char res[33];
1824   unsigned char *p1;
1825   char *p2;
1826   int cnt;
1827   ALLOCA_MD5_CONTEXT (ctx);
1828
1829   gen_md5_init (ctx);
1830   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1831   gen_md5_finish (ctx, raw);
1832
1833   p1 = raw;
1834   p2 = res;
1835   cnt = 16;
1836   while (cnt--)
1837     {
1838       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1839       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1840       ++p1;
1841     }
1842   *p2 = '\0';
1843
1844   return res;
1845 }
1846 #endif