sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 #include "wget.h"
  63 #include "utils.h"
  64 #include "fnmatch.h"
  65 #include "hash.h"
  66
  67 #ifndef errno
  68 extern int errno;
  69 #endif
  70
  71 /* This section implements several wrappers around the basic
  72    allocation routines.  This is done for two reasons: first, so that
  73    the callers of these functions need not consistently check for
  74    errors.  If there is not enough virtual memory for running Wget,
  75    something is seriously wrong, and Wget exits with an appropriate
  76    error message.
  77
  78    The second reason why these are useful is that, if DEBUG_MALLOC is
  79    defined, they also provide a handy (if crude) malloc debugging
  80    interface that checks memory leaks.  */
  81
  82 /* Croak the fatal memory error and bail out with non-zero exit
  83    status.  */
  84 static void
  85 memfatal (const char *what)
  86 {
  87   /* HACK: expose save_log_p from log.c, so we can turn it off in
  88      order to prevent saving the log.  Saving the log is dangerous
  89      because logprintf() and logputs() can call malloc(), so this
  90      could infloop.  When logging is turned off, infloop can no longer
  91      happen.
  92
  93      #### This is no longer really necessary because the new routines
  94      in log.c cons only if the line exceeds eighty characters.  But
  95      this can come at the end of a line, so it's OK to be careful.
  96
  97      On a more serious note, it would be good to have a
  98      log_forced_shutdown() routine that exposes this cleanly.  */
  99   extern int save_log_p;
 100
 101   save_log_p = 0;
 102   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
 103   exit (1);
 104 }
 105
 106 /* These functions end with _real because they need to be
 107    distinguished from the debugging functions, and from the macros.
 108    Explanation follows:
 109
 110    If memory debugging is not turned on, wget.h defines these:
 111
 112      #define xmalloc xmalloc_real
 113      #define xrealloc xrealloc_real
 114      #define xstrdup xstrdup_real
 115      #define xfree free
 116
 117    In case of memory debugging, the definitions are a bit more
 118    complex, because we want to provide more information, *and* we want
 119    to call the debugging code.  (The former is the reason why xmalloc
 120    and friends need to be macros in the first place.)  Then it looks
 121    like this:
 122
 123      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 124      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 125      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 126      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 127
 128    Each of the *_debug function does its magic and calls the real one.  */
 129
 130 #ifdef DEBUG_MALLOC
 131 # define STATIC_IF_DEBUG static
 132 #else
 133 # define STATIC_IF_DEBUG
 134 #endif
 135
 136 STATIC_IF_DEBUG void *
 137 xmalloc_real (size_t size)
 138 {
 139   void *ptr = malloc (size);
 140   if (!ptr)
 141     memfatal ("malloc");
 142   return ptr;
 143 }
 144
 145 STATIC_IF_DEBUG void *
 146 xrealloc_real (void *ptr, size_t newsize)
 147 {
 148   void *newptr;
 149
 150   /* Not all Un*xes have the feature of realloc() that calling it with
 151      a NULL-pointer is the same as malloc(), but it is easy to
 152      simulate.  */
 153   if (ptr)
 154     newptr = realloc (ptr, newsize);
 155   else
 156     newptr = malloc (newsize);
 157   if (!newptr)
 158     memfatal ("realloc");
 159   return newptr;
 160 }
 161
 162 STATIC_IF_DEBUG char *
 163 xstrdup_real (const char *s)
 164 {
 165   char *copy;
 166
 167 #ifndef HAVE_STRDUP
 168   int l = strlen (s);
 169   copy = malloc (l + 1);
 170   if (!copy)
 171     memfatal ("strdup");
 172   memcpy (copy, s, l + 1);
 173 #else  /* HAVE_STRDUP */
 174   copy = strdup (s);
 175   if (!copy)
 176     memfatal ("strdup");
 177 #endif /* HAVE_STRDUP */
 178
 179   return copy;
 180 }
 181
 182 #ifdef DEBUG_MALLOC
 183
 184 /* Crude home-grown routines for debugging some malloc-related
 185    problems.  Featured:
 186
 187    * Counting the number of malloc and free invocations, and reporting
 188      the "balance", i.e. how many times more malloc was called than it
 189      was the case with free.
 190
 191    * Making malloc store its entry into a simple array and free remove
 192      stuff from that array.  At the end, print the pointers which have
 193      not been freed, along with the source file and the line number.
 194      This also has the side-effect of detecting freeing memory that
 195      was never allocated.
 196
 197    Note that this kind of memory leak checking strongly depends on
 198    every malloc() being followed by a free(), even if the program is
 199    about to finish.  Wget is careful to free the data structure it
 200    allocated in init.c.  */
 201
 202 static int malloc_count, free_count;
 203
 204 static struct {
 205   char *ptr;
 206   const char *file;
 207   int line;
 208 } malloc_debug[100000];
 209
 210 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 211    which can be a real problem.  It would be nice to use a hash table
 212    for malloc_debug, but the functions in hash.c are not suitable
 213    because they can call malloc() themselves.  Maybe it would work if
 214    the hash table were preallocated to a huge size, and if we set the
 215    rehash threshold to 1.0.  */
 216
 217 /* Register PTR in malloc_debug.  Abort if this is not possible
 218    (presumably due to the number of current allocations exceeding the
 219    size of malloc_debug.)  */
 220
 221 static void
 222 register_ptr (void *ptr, const char *file, int line)
 223 {
 224   int i;
 225   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 226     if (malloc_debug[i].ptr == NULL)
 227       {
 228         malloc_debug[i].ptr = ptr;
 229         malloc_debug[i].file = file;
 230         malloc_debug[i].line = line;
 231         return;
 232       }
 233   abort ();
 234 }
 235
 236 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 237    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 238
 239 static void
 240 unregister_ptr (void *ptr)
 241 {
 242   int i;
 243   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 244     if (malloc_debug[i].ptr == ptr)
 245       {
 246         malloc_debug[i].ptr = NULL;
 247         return;
 248       }
 249   abort ();
 250 }
 251
 252 /* Print the malloc debug stats that can be gathered from the above
 253    information.  Currently this is the count of mallocs, frees, the
 254    difference between the two, and the dump of the contents of
 255    malloc_debug.  The last part are the memory leaks.  */
 256
 257 void
 258 print_malloc_debug_stats (void)
 259 {
 260   int i;
 261   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 262           malloc_count, free_count, malloc_count - free_count);
 263   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 264     if (malloc_debug[i].ptr != NULL)
 265       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 266               malloc_debug[i].file, malloc_debug[i].line);
 267 }
 268
 269 void *
 270 xmalloc_debug (size_t size, const char *source_file, int source_line)
 271 {
 272   void *ptr = xmalloc_real (size);
 273   ++malloc_count;
 274   register_ptr (ptr, source_file, source_line);
 275   return ptr;
 276 }
 277
 278 void
 279 xfree_debug (void *ptr, const char *source_file, int source_line)
 280 {
 281   assert (ptr != NULL);
 282   ++free_count;
 283   unregister_ptr (ptr);
 284   free (ptr);
 285 }
 286
 287 void *
 288 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 289 {
 290   void *newptr = xrealloc_real (ptr, newsize);
 291   if (!ptr)
 292     {
 293       ++malloc_count;
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   else if (newptr != ptr)
 297     {
 298       unregister_ptr (ptr);
 299       register_ptr (newptr, source_file, source_line);
 300     }
 301   return newptr;
 302 }
 303
 304 char *
 305 xstrdup_debug (const char *s, const char *source_file, int source_line)
 306 {
 307   char *copy = xstrdup_real (s);
 308   ++malloc_count;
 309   register_ptr (copy, source_file, source_line);
 310   return copy;
 311 }
 312
 313 #endif /* DEBUG_MALLOC */
 314 \f
 315 /* Utility function: like xstrdup(), but also lowercases S.  */
 316
 317 char *
 318 xstrdup_lower (const char *s)
 319 {
 320   char *copy = xstrdup (s);
 321   char *p = copy;
 322   for (; *p; p++)
 323     *p = TOLOWER (*p);
 324   return copy;
 325 }
 326
 327 /* Return a count of how many times CHR occurs in STRING. */
 328
 329 int
 330 count_char (const char *string, char chr)
 331 {
 332   const char *p;
 333   int count = 0;
 334   for (p = string; *p; p++)
 335     if (*p == chr)
 336       ++count;
 337   return count;
 338 }
 339
 340 /* Copy the string formed by two pointers (one on the beginning, other
 341    on the char after the last char) to a new, malloc-ed location.
 342    0-terminate it.  */
 343 char *
 344 strdupdelim (const char *beg, const char *end)
 345 {
 346   char *res = (char *)xmalloc (end - beg + 1);
 347   memcpy (res, beg, end - beg);
 348   res[end - beg] = '\0';
 349   return res;
 350 }
 351
 352 /* Parse a string containing comma-separated elements, and return a
 353    vector of char pointers with the elements.  Spaces following the
 354    commas are ignored.  */
 355 char **
 356 sepstring (const char *s)
 357 {
 358   char **res;
 359   const char *p;
 360   int i = 0;
 361
 362   if (!s || !*s)
 363     return NULL;
 364   res = NULL;
 365   p = s;
 366   while (*s)
 367     {
 368       if (*s == ',')
 369         {
 370           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 371           res[i] = strdupdelim (p, s);
 372           res[++i] = NULL;
 373           ++s;
 374           /* Skip the blanks following the ','.  */
 375           while (ISSPACE (*s))
 376             ++s;
 377           p = s;
 378         }
 379       else
 380         ++s;
 381     }
 382   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 383   res[i] = strdupdelim (p, s);
 384   res[i + 1] = NULL;
 385   return res;
 386 }
 387 \f
 388 /* Return pointer to a static char[] buffer in which zero-terminated
 389    string-representation of TM (in form hh:mm:ss) is printed.
 390
 391    If TM is non-NULL, the current time-in-seconds will be stored
 392    there.
 393
 394    (#### This is misleading: one would expect TM would be used instead
 395    of the current time in that case.  This design was probably
 396    influenced by the design time(2), and should be changed at some
 397    points.  No callers use non-NULL TM anyway.)  */
 398
 399 char *
 400 time_str (time_t *tm)
 401 {
 402   static char output[15];
 403   struct tm *ptm;
 404   time_t secs = time (tm);
 405
 406   if (secs == -1)
 407     {
 408       /* In case of error, return the empty string.  Maybe we should
 409          just abort if this happens?  */
 410       *output = '\0';
 411       return output;
 412     }
 413   ptm = localtime (&secs);
 414   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 415   return output;
 416 }
 417
 418 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 419
 420 char *
 421 datetime_str (time_t *tm)
 422 {
 423   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 424   struct tm *ptm;
 425   time_t secs = time (tm);
 426
 427   if (secs == -1)
 428     {
 429       /* In case of error, return the empty string.  Maybe we should
 430          just abort if this happens?  */
 431       *output = '\0';
 432       return output;
 433     }
 434   ptm = localtime (&secs);
 435   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 436            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 437            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 438   return output;
 439 }
 440 \f
 441 /* The Windows versions of the following two functions are defined in
 442    mswindows.c.  */
 443
 444 #ifndef WINDOWS
 445 void
 446 fork_to_background (void)
 447 {
 448   pid_t pid;
 449   /* Whether we arrange our own version of opt.lfilename here.  */
 450   int changedp = 0;
 451
 452   if (!opt.lfilename)
 453     {
 454       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 455       changedp = 1;
 456     }
 457   pid = fork ();
 458   if (pid < 0)
 459     {
 460       /* parent, error */
 461       perror ("fork");
 462       exit (1);
 463     }
 464   else if (pid != 0)
 465     {
 466       /* parent, no error */
 467       printf (_("Continuing in background.\n"));
 468       if (changedp)
 469         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 470       exit (0);
 471     }
 472   /* child: keep running */
 473 }
 474 #endif /* not WINDOWS */
 475 \f
 476 #if 0
 477 /* debug */
 478 char *
 479 ps (char *orig)
 480 {
 481   char *r = xstrdup (orig);
 482   path_simplify (r);
 483   return r;
 484 }
 485 #endif
 486
 487 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 488    in that:
 489         Multple `/'s are collapsed to a single `/'.
 490         Leading `./'s and trailing `/.'s are removed.
 491         Trailing `/'s are removed.
 492         Non-leading `../'s and trailing `..'s are handled by removing
 493         portions of the path.
 494
 495    E.g. "a/b/c/./../d/.." will yield "a/b/".  This function originates
 496    from GNU Bash and has been mutilated to unrecognition for use in
 497    Wget.
 498
 499    Changes for Wget:
 500         Always use '/' as stub_char.
 501         Don't check for local things using canon_stat.
 502         Change the original string instead of strdup-ing.
 503         React correctly when beginning with `./' and `../'.
 504         Don't zip out trailing slashes.
 505         Return a value indicating whether any modifications took place.
 506
 507    If you dare change this function, take a careful look at the test
 508    cases below, and make sure that they pass.  */
 509
 510 int
 511 path_simplify (char *path)
 512 {
 513   register int i, start;
 514   int changes = 0;
 515
 516   if (!*path)
 517     return 0;
 518
 519   if (path[0] == '/')
 520     /* Preserve initial '/'. */
 521     ++path;
 522
 523   /* Nix out leading `.' or `..' with.  */
 524   if ((path[0] == '.' && path[1] == '\0')
 525       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 526     {
 527       path[0] = '\0';
 528       changes = 1;
 529       return changes;
 530     }
 531
 532   /* Walk along PATH looking for things to compact.  */
 533   i = 0;
 534   while (1)
 535     {
 536       if (!path[i])
 537         break;
 538
 539       while (path[i] && path[i] != '/')
 540         i++;
 541
 542       start = i++;
 543
 544       /* If we didn't find any slashes, then there is nothing left to do.  */
 545       if (!path[start])
 546         break;
 547
 548       /* Handle multiple `/'s in a row.  */
 549       while (path[i] == '/')
 550         i++;
 551
 552       if ((start + 1) != i)
 553         {
 554           strcpy (path + start + 1, path + i);
 555           i = start + 1;
 556           changes = 1;
 557         }
 558
 559       /* Check for `../', `./' or trailing `.' by itself.  */
 560       if (path[i] == '.')
 561         {
 562           /* Handle trailing `.' by itself.  */
 563           if (!path[i + 1])
 564             {
 565               path[--i] = '\0';
 566               changes = 1;
 567               break;
 568             }
 569
 570           /* Handle `./'.  */
 571           if (path[i + 1] == '/')
 572             {
 573               strcpy (path + i, path + i + 1);
 574               i = (start < 0) ? 0 : start;
 575               changes = 1;
 576               continue;
 577             }
 578
 579           /* Handle `../' or trailing `..' by itself.  */
 580           if (path[i + 1] == '.' &&
 581               (path[i + 2] == '/' || !path[i + 2]))
 582             {
 583               while (--start > -1 && path[start] != '/');
 584               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 585               i = (start < 0) ? 0 : start;
 586               changes = 1;
 587               continue;
 588             }
 589         }       /* path == '.' */
 590     } /* while */
 591
 592   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 593   i = 0;
 594   while (1)
 595     {
 596       if (path[i] == '.' && path[i + 1] == '/')
 597         i += 2;
 598       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 599         i += 3;
 600       else
 601         break;
 602     }
 603   if (i)
 604     {
 605       strcpy (path, path + i - 0);
 606       changes = 1;
 607     }
 608
 609   return changes;
 610 }
 611
 612 /* Test cases:
 613    ps("")                   -> ""
 614    ps("/")                  -> "/"
 615    ps(".")                  -> ""
 616    ps("..")                 -> ""
 617    ps("/.")                 -> "/"
 618    ps("/..")                -> "/"
 619    ps("foo")                -> "foo"
 620    ps("foo/bar")            -> "foo/bar"
 621    ps("foo//bar")           -> "foo/bar"             (possibly a bug)
 622    ps("foo/../bar")         -> "bar"
 623    ps("foo/bar/..")         -> "foo/"
 624    ps("foo/bar/../x")       -> "foo/x"
 625    ps("foo/bar/../x/")      -> "foo/x/"
 626    ps("foo/..")             -> ""
 627    ps("/foo/..")            -> "/"
 628    ps("a/b/../../c")        -> "c"
 629    ps("/a/b/../../c")       -> "/c"
 630    ps("./a/../b")           -> "b"
 631    ps("/./a/../b")          -> "/b"
 632 */
 633 \f
 634 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 635    specified with TM.  */
 636 void
 637 touch (const char *file, time_t tm)
 638 {
 639 #ifdef HAVE_STRUCT_UTIMBUF
 640   struct utimbuf times;
 641   times.actime = times.modtime = tm;
 642 #else
 643   time_t times[2];
 644   times[0] = times[1] = tm;
 645 #endif
 646
 647   if (utime (file, &times) == -1)
 648     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 649 }
 650
 651 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 652    nothing under MS-Windows.  */
 653 int
 654 remove_link (const char *file)
 655 {
 656   int err = 0;
 657   struct stat st;
 658
 659   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 660     {
 661       DEBUGP (("Unlinking %s (symlink).\n", file));
 662       err = unlink (file);
 663       if (err != 0)
 664         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 665                    file, strerror (errno));
 666     }
 667   return err;
 668 }
 669
 670 /* Does FILENAME exist?  This is quite a lousy implementation, since
 671    it supplies no error codes -- only a yes-or-no answer.  Thus it
 672    will return that a file does not exist if, e.g., the directory is
 673    unreadable.  I don't mind it too much currently, though.  The
 674    proper way should, of course, be to have a third, error state,
 675    other than true/false, but that would introduce uncalled-for
 676    additional complexity to the callers.  */
 677 int
 678 file_exists_p (const char *filename)
 679 {
 680 #ifdef HAVE_ACCESS
 681   return access (filename, F_OK) >= 0;
 682 #else
 683   struct stat buf;
 684   return stat (filename, &buf) >= 0;
 685 #endif
 686 }
 687
 688 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 689    Returns 0 on error.  */
 690 int
 691 file_non_directory_p (const char *path)
 692 {
 693   struct stat buf;
 694   /* Use lstat() rather than stat() so that symbolic links pointing to
 695      directories can be identified correctly.  */
 696   if (lstat (path, &buf) != 0)
 697     return 0;
 698   return S_ISDIR (buf.st_mode) ? 0 : 1;
 699 }
 700
 701 /* Return a unique filename, given a prefix and count */
 702 static char *
 703 unique_name_1 (const char *fileprefix, int count)
 704 {
 705   char *filename;
 706
 707   if (count)
 708     {
 709       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 710       sprintf (filename, "%s.%d", fileprefix, count);
 711     }
 712   else
 713     filename = xstrdup (fileprefix);
 714
 715   if (!file_exists_p (filename))
 716     return filename;
 717   else
 718     {
 719       xfree (filename);
 720       return NULL;
 721     }
 722 }
 723
 724 /* Return a unique file name, based on PREFIX.  */
 725 char *
 726 unique_name (const char *prefix)
 727 {
 728   char *file = NULL;
 729   int count = 0;
 730
 731   while (!file)
 732     file = unique_name_1 (prefix, count++);
 733   return file;
 734 }
 735 \f
 736 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 737    are missing, create them first.  In case any mkdir() call fails,
 738    return its error status.  Returns 0 on successful completion.
 739
 740    The behaviour of this function should be identical to the behaviour
 741    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 742 int
 743 make_directory (const char *directory)
 744 {
 745   int quit = 0;
 746   int i;
 747   char *dir;
 748
 749   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 750      function is unsafe if called with a read-only char *argument.  */
 751   STRDUP_ALLOCA (dir, directory);
 752
 753   /* If the first character of dir is '/', skip it (and thus enable
 754      creation of absolute-pathname directories.  */
 755   for (i = (*dir == '/'); 1; ++i)
 756     {
 757       for (; dir[i] && dir[i] != '/'; i++)
 758         ;
 759       if (!dir[i])
 760         quit = 1;
 761       dir[i] = '\0';
 762       /* Check whether the directory already exists.  */
 763       if (!file_exists_p (dir))
 764         {
 765           if (mkdir (dir, 0777) < 0)
 766             return -1;
 767         }
 768       if (quit)
 769         break;
 770       else
 771         dir[i] = '/';
 772     }
 773   return 0;
 774 }
 775
 776 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 777    should be a file name.
 778
 779    file_merge("/foo/bar", "baz")  => "/foo/baz"
 780    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 781    file_merge("foo", "bar")       => "bar"
 782
 783    In other words, it's a simpler and gentler version of uri_merge_1.  */
 784
 785 char *
 786 file_merge (const char *base, const char *file)
 787 {
 788   char *result;
 789   const char *cut = (const char *)strrchr (base, '/');
 790
 791   if (!cut)
 792     return xstrdup (file);
 793
 794   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 795   memcpy (result, base, cut - base);
 796   result[cut - base] = '/';
 797   strcpy (result + (cut - base) + 1, file);
 798
 799   return result;
 800 }
 801 \f
 802 static int in_acclist PARAMS ((const char *const *, const char *, int));
 803
 804 /* Determine whether a file is acceptable to be followed, according to
 805    lists of patterns to accept/reject.  */
 806 int
 807 acceptable (const char *s)
 808 {
 809   int l = strlen (s);
 810
 811   while (l && s[l] != '/')
 812     --l;
 813   if (s[l] == '/')
 814     s += (l + 1);
 815   if (opt.accepts)
 816     {
 817       if (opt.rejects)
 818         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 819                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 820       else
 821         return in_acclist ((const char *const *)opt.accepts, s, 1);
 822     }
 823   else if (opt.rejects)
 824     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 825   return 1;
 826 }
 827
 828 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 829    `/something', frontcmp() will return 1 only if S2 begins with
 830    `/something'.  Otherwise, 0 is returned.  */
 831 int
 832 frontcmp (const char *s1, const char *s2)
 833 {
 834   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 835   return !*s1;
 836 }
 837
 838 /* Iterate through STRLIST, and return the first element that matches
 839    S, through wildcards or front comparison (as appropriate).  */
 840 static char *
 841 proclist (char **strlist, const char *s, enum accd flags)
 842 {
 843   char **x;
 844
 845   for (x = strlist; *x; x++)
 846     if (has_wildcards_p (*x))
 847       {
 848         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 849           break;
 850       }
 851     else
 852       {
 853         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 854         if (frontcmp (p, s))
 855           break;
 856       }
 857   return *x;
 858 }
 859
 860 /* Returns whether DIRECTORY is acceptable for download, wrt the
 861    include/exclude lists.
 862
 863    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 864    and absolute paths may be freely intermixed.  */
 865 int
 866 accdir (const char *directory, enum accd flags)
 867 {
 868   /* Remove starting '/'.  */
 869   if (flags & ALLABS && *directory == '/')
 870     ++directory;
 871   if (opt.includes)
 872     {
 873       if (!proclist (opt.includes, directory, flags))
 874         return 0;
 875     }
 876   if (opt.excludes)
 877     {
 878       if (proclist (opt.excludes, directory, flags))
 879         return 0;
 880     }
 881   return 1;
 882 }
 883
 884 /* Match the end of STRING against PATTERN.  For instance:
 885
 886    match_backwards ("abc", "bc") -> 1
 887    match_backwards ("abc", "ab") -> 0
 888    match_backwards ("abc", "abc") -> 1 */
 889 int
 890 match_tail (const char *string, const char *pattern)
 891 {
 892   int i, j;
 893
 894   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 895     if (string[i] != pattern[j])
 896       break;
 897   /* If the pattern was exhausted, the match was succesful.  */
 898   if (j == -1)
 899     return 1;
 900   else
 901     return 0;
 902 }
 903
 904 /* Checks whether string S matches each element of ACCEPTS.  A list
 905    element are matched either with fnmatch() or match_tail(),
 906    according to whether the element contains wildcards or not.
 907
 908    If the BACKWARD is 0, don't do backward comparison -- just compare
 909    them normally.  */
 910 static int
 911 in_acclist (const char *const *accepts, const char *s, int backward)
 912 {
 913   for (; *accepts; accepts++)
 914     {
 915       if (has_wildcards_p (*accepts))
 916         {
 917           /* fnmatch returns 0 if the pattern *does* match the
 918              string.  */
 919           if (fnmatch (*accepts, s, 0) == 0)
 920             return 1;
 921         }
 922       else
 923         {
 924           if (backward)
 925             {
 926               if (match_tail (s, *accepts))
 927                 return 1;
 928             }
 929           else
 930             {
 931               if (!strcmp (s, *accepts))
 932                 return 1;
 933             }
 934         }
 935     }
 936   return 0;
 937 }
 938
 939 /* Return the location of STR's suffix (file extension).  Examples:
 940    suffix ("foo.bar")       -> "bar"
 941    suffix ("foo.bar.baz")   -> "baz"
 942    suffix ("/foo/bar")      -> NULL
 943    suffix ("/foo.bar/baz")  -> NULL  */
 944 char *
 945 suffix (const char *str)
 946 {
 947   int i;
 948
 949   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 950     ;
 951
 952   if (str[i++] == '.')
 953     return (char *)str + i;
 954   else
 955     return NULL;
 956 }
 957
 958 /* Read a line from FP.  The function reallocs the storage as needed
 959    to accomodate for any length of the line.  Reallocs are done
 960    exponentially, doubling the storage after each overflow to minimize
 961    the number of calls to realloc() and fgets().  The newline
 962    character at the end of line is retained.
 963
 964    After end-of-file is encountered without anything being read, NULL
 965    is returned.  NULL is also returned on error.  To distinguish
 966    between these two cases, use the stdio function ferror().
 967
 968    A future version of this function will be rewritten to use fread()
 969    instead of fgets(), and to return the length of the line, which
 970    will make the function usable on files with binary content.  */
 971
 972 char *
 973 read_whole_line (FILE *fp)
 974 {
 975   int length = 0;
 976   int bufsize = 81;
 977   char *line = (char *)xmalloc (bufsize);
 978
 979   while (fgets (line + length, bufsize - length, fp))
 980     {
 981       length += strlen (line + length);
 982       if (length == 0)
 983         /* Possible for example when reading from a binary file where
 984            a line begins with \0.  */
 985         continue;
 986
 987       if (line[length - 1] == '\n')
 988         break;
 989
 990       /* fgets() guarantees to read the whole line, or to use up the
 991          space we've given it.  We can double the buffer
 992          unconditionally.  */
 993       bufsize <<= 1;
 994       line = xrealloc (line, bufsize);
 995     }
 996   if (length == 0 || ferror (fp))
 997     {
 998       xfree (line);
 999       return NULL;
1000     }
1001   if (length + 1 < bufsize)
1002     /* Relieve the memory from our exponential greediness.  We say
1003        `length + 1' because the terminating \0 is not included in
1004        LENGTH.  We don't need to zero-terminate the string ourselves,
1005        though, because fgets() does that.  */
1006     line = xrealloc (line, length + 1);
1007   return line;
1008 }
1009 \f
1010 /* Read FILE into memory.  A pointer to `struct file_memory' are
1011    returned; use struct element `content' to access file contents, and
1012    the element `length' to know the file length.  `content' is *not*
1013    zero-terminated, and you should *not* read or write beyond the [0,
1014    length) range of characters.
1015
1016    After you are done with the file contents, call read_file_free to
1017    release the memory.
1018
1019    Depending on the operating system and the type of file that is
1020    being read, read_file() either mmap's the file into memory, or
1021    reads the file into the core using read().
1022
1023    If file is named "-", fileno(stdin) is used for reading instead.
1024    If you want to read from a real file named "-", use "./-" instead.  */
1025
1026 struct file_memory *
1027 read_file (const char *file)
1028 {
1029   int fd;
1030   struct file_memory *fm;
1031   long size;
1032   int inhibit_close = 0;
1033
1034   /* Some magic in the finest tradition of Perl and its kin: if FILE
1035      is "-", just use stdin.  */
1036   if (HYPHENP (file))
1037     {
1038       fd = fileno (stdin);
1039       inhibit_close = 1;
1040       /* Note that we don't inhibit mmap() in this case.  If stdin is
1041          redirected from a regular file, mmap() will still work.  */
1042     }
1043   else
1044     fd = open (file, O_RDONLY);
1045   if (fd < 0)
1046     return NULL;
1047   fm = xmalloc (sizeof (struct file_memory));
1048
1049 #ifdef HAVE_MMAP
1050   {
1051     struct stat buf;
1052     if (fstat (fd, &buf) < 0)
1053       goto mmap_lose;
1054     fm->length = buf.st_size;
1055     /* NOTE: As far as I know, the callers of this function never
1056        modify the file text.  Relying on this would enable us to
1057        specify PROT_READ and MAP_SHARED for a marginal gain in
1058        efficiency, but at some cost to generality.  */
1059     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1060                         MAP_PRIVATE, fd, 0);
1061     if (fm->content == (char *)MAP_FAILED)
1062       goto mmap_lose;
1063     if (!inhibit_close)
1064       close (fd);
1065
1066     fm->mmap_p = 1;
1067     return fm;
1068   }
1069
1070  mmap_lose:
1071   /* The most common reason why mmap() fails is that FD does not point
1072      to a plain file.  However, it's also possible that mmap() doesn't
1073      work for a particular type of file.  Therefore, whenever mmap()
1074      fails, we just fall back to the regular method.  */
1075 #endif /* HAVE_MMAP */
1076
1077   fm->length = 0;
1078   size = 512;                   /* number of bytes fm->contents can
1079                                    hold at any given time. */
1080   fm->content = xmalloc (size);
1081   while (1)
1082     {
1083       long nread;
1084       if (fm->length > size / 2)
1085         {
1086           /* #### I'm not sure whether the whole exponential-growth
1087              thing makes sense with kernel read.  On Linux at least,
1088              read() refuses to read more than 4K from a file at a
1089              single chunk anyway.  But other Unixes might optimize it
1090              better, and it doesn't *hurt* anything, so I'm leaving
1091              it.  */
1092
1093           /* Normally, we grow SIZE exponentially to make the number
1094              of calls to read() and realloc() logarithmic in relation
1095              to file size.  However, read() can read an amount of data
1096              smaller than requested, and it would be unreasonably to
1097              double SIZE every time *something* was read.  Therefore,
1098              we double SIZE only when the length exceeds half of the
1099              entire allocated size.  */
1100           size <<= 1;
1101           fm->content = xrealloc (fm->content, size);
1102         }
1103       nread = read (fd, fm->content + fm->length, size - fm->length);
1104       if (nread > 0)
1105         /* Successful read. */
1106         fm->length += nread;
1107       else if (nread < 0)
1108         /* Error. */
1109         goto lose;
1110       else
1111         /* EOF */
1112         break;
1113     }
1114   if (!inhibit_close)
1115     close (fd);
1116   if (size > fm->length && fm->length != 0)
1117     /* Due to exponential growth of fm->content, the allocated region
1118        might be much larger than what is actually needed.  */
1119     fm->content = xrealloc (fm->content, fm->length);
1120   fm->mmap_p = 0;
1121   return fm;
1122
1123  lose:
1124   if (!inhibit_close)
1125     close (fd);
1126   xfree (fm->content);
1127   xfree (fm);
1128   return NULL;
1129 }
1130
1131 /* Release the resources held by FM.  Specifically, this calls
1132    munmap() or xfree() on fm->content, depending whether mmap or
1133    malloc/read were used to read in the file.  It also frees the
1134    memory needed to hold the FM structure itself.  */
1135
1136 void
1137 read_file_free (struct file_memory *fm)
1138 {
1139 #ifdef HAVE_MMAP
1140   if (fm->mmap_p)
1141     {
1142       munmap (fm->content, fm->length);
1143     }
1144   else
1145 #endif
1146     {
1147       xfree (fm->content);
1148     }
1149   xfree (fm);
1150 }
1151 \f
1152 /* Free the pointers in a NULL-terminated vector of pointers, then
1153    free the pointer itself.  */
1154 void
1155 free_vec (char **vec)
1156 {
1157   if (vec)
1158     {
1159       char **p = vec;
1160       while (*p)
1161         xfree (*p++);
1162       xfree (vec);
1163     }
1164 }
1165
1166 /* Append vector V2 to vector V1.  The function frees V2 and
1167    reallocates V1 (thus you may not use the contents of neither
1168    pointer after the call).  If V1 is NULL, V2 is returned.  */
1169 char **
1170 merge_vecs (char **v1, char **v2)
1171 {
1172   int i, j;
1173
1174   if (!v1)
1175     return v2;
1176   if (!v2)
1177     return v1;
1178   if (!*v2)
1179     {
1180       /* To avoid j == 0 */
1181       xfree (v2);
1182       return v1;
1183     }
1184   /* Count v1.  */
1185   for (i = 0; v1[i]; i++);
1186   /* Count v2.  */
1187   for (j = 0; v2[j]; j++);
1188   /* Reallocate v1.  */
1189   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1190   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1191   xfree (v2);
1192   return v1;
1193 }
1194
1195 /* A set of simple-minded routines to store strings in a linked list.
1196    This used to also be used for searching, but now we have hash
1197    tables for that.  */
1198
1199 /* It's a shame that these simple things like linked lists and hash
1200    tables (see hash.c) need to be implemented over and over again.  It
1201    would be nice to be able to use the routines from glib -- see
1202    www.gtk.org for details.  However, that would make Wget depend on
1203    glib, and I want to avoid dependencies to external libraries for
1204    reasons of convenience and portability (I suspect Wget is more
1205    portable than anything ever written for Gnome).  */
1206
1207 /* Append an element to the list.  If the list has a huge number of
1208    elements, this can get slow because it has to find the list's
1209    ending.  If you think you have to call slist_append in a loop,
1210    think about calling slist_prepend() followed by slist_nreverse().  */
1211
1212 slist *
1213 slist_append (slist *l, const char *s)
1214 {
1215   slist *newel = (slist *)xmalloc (sizeof (slist));
1216   slist *beg = l;
1217
1218   newel->string = xstrdup (s);
1219   newel->next = NULL;
1220
1221   if (!l)
1222     return newel;
1223   /* Find the last element.  */
1224   while (l->next)
1225     l = l->next;
1226   l->next = newel;
1227   return beg;
1228 }
1229
1230 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1231
1232 slist *
1233 slist_prepend (slist *l, const char *s)
1234 {
1235   slist *newel = (slist *)xmalloc (sizeof (slist));
1236   newel->string = xstrdup (s);
1237   newel->next = l;
1238   return newel;
1239 }
1240
1241 /* Destructively reverse L. */
1242
1243 slist *
1244 slist_nreverse (slist *l)
1245 {
1246   slist *prev = NULL;
1247   while (l)
1248     {
1249       slist *next = l->next;
1250       l->next = prev;
1251       prev = l;
1252       l = next;
1253     }
1254   return prev;
1255 }
1256
1257 /* Is there a specific entry in the list?  */
1258 int
1259 slist_contains (slist *l, const char *s)
1260 {
1261   for (; l; l = l->next)
1262     if (!strcmp (l->string, s))
1263       return 1;
1264   return 0;
1265 }
1266
1267 /* Free the whole slist.  */
1268 void
1269 slist_free (slist *l)
1270 {
1271   while (l)
1272     {
1273       slist *n = l->next;
1274       xfree (l->string);
1275       xfree (l);
1276       l = n;
1277     }
1278 }
1279 \f
1280 /* Sometimes it's useful to create "sets" of strings, i.e. special
1281    hash tables where you want to store strings as keys and merely
1282    query for their existence.  Here is a set of utility routines that
1283    makes that transparent.  */
1284
1285 void
1286 string_set_add (struct hash_table *ht, const char *s)
1287 {
1288   /* First check whether the set element already exists.  If it does,
1289      do nothing so that we don't have to free() the old element and
1290      then strdup() a new one.  */
1291   if (hash_table_contains (ht, s))
1292     return;
1293
1294   /* We use "1" as value.  It provides us a useful and clear arbitrary
1295      value, and it consumes no memory -- the pointers to the same
1296      string "1" will be shared by all the key-value pairs in all `set'
1297      hash tables.  */
1298   hash_table_put (ht, xstrdup (s), "1");
1299 }
1300
1301 /* Synonym for hash_table_contains... */
1302
1303 int
1304 string_set_contains (struct hash_table *ht, const char *s)
1305 {
1306   return hash_table_contains (ht, s);
1307 }
1308
1309 static int
1310 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1311 {
1312   xfree (key);
1313   return 0;
1314 }
1315
1316 void
1317 string_set_free (struct hash_table *ht)
1318 {
1319   hash_table_map (ht, string_set_free_mapper, NULL);
1320   hash_table_destroy (ht);
1321 }
1322
1323 static int
1324 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1325 {
1326   xfree (key);
1327   xfree (value);
1328   return 0;
1329 }
1330
1331 /* Another utility function: call free() on all keys and values of HT.  */
1332
1333 void
1334 free_keys_and_values (struct hash_table *ht)
1335 {
1336   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1337 }
1338
1339 \f
1340 /* Engine for legible and legible_very_long; this function works on
1341    strings.  */
1342
1343 static char *
1344 legible_1 (const char *repr)
1345 {
1346   static char outbuf[128];
1347   int i, i1, mod;
1348   char *outptr;
1349   const char *inptr;
1350
1351   /* Reset the pointers.  */
1352   outptr = outbuf;
1353   inptr = repr;
1354   /* If the number is negative, shift the pointers.  */
1355   if (*inptr == '-')
1356     {
1357       *outptr++ = '-';
1358       ++inptr;
1359     }
1360   /* How many digits before the first separator?  */
1361   mod = strlen (inptr) % 3;
1362   /* Insert them.  */
1363   for (i = 0; i < mod; i++)
1364     *outptr++ = inptr[i];
1365   /* Now insert the rest of them, putting separator before every
1366      third digit.  */
1367   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1368     {
1369       if (i % 3 == 0 && i1 != 0)
1370         *outptr++ = ',';
1371       *outptr++ = inptr[i1];
1372     }
1373   /* Zero-terminate the string.  */
1374   *outptr = '\0';
1375   return outbuf;
1376 }
1377
1378 /* Legible -- return a static pointer to the legibly printed long.  */
1379 char *
1380 legible (long l)
1381 {
1382   char inbuf[24];
1383   /* Print the number into the buffer.  */
1384   long_to_string (inbuf, l);
1385   return legible_1 (inbuf);
1386 }
1387
1388 /* Write a string representation of NUMBER into the provided buffer.
1389    We cannot use sprintf() because we cannot be sure whether the
1390    platform supports printing of what we chose for VERY_LONG_TYPE.
1391
1392    Example: Gcc supports `long long' under many platforms, but on many
1393    of those the native libc knows nothing of it and therefore cannot
1394    print it.
1395
1396    How long BUFFER needs to be depends on the platform and the content
1397    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1398    bytes are sufficient.  Using more might be a good idea.
1399
1400    This function does not go through the hoops that long_to_string
1401    goes to because it doesn't aspire to be fast.  (It's called perhaps
1402    once in a Wget run.)  */
1403
1404 static void
1405 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1406 {
1407   int i = 0;
1408   int j;
1409
1410   /* Print the number backwards... */
1411   do
1412     {
1413       buffer[i++] = '0' + number % 10;
1414       number /= 10;
1415     }
1416   while (number);
1417
1418   /* ...and reverse the order of the digits. */
1419   for (j = 0; j < i / 2; j++)
1420     {
1421       char c = buffer[j];
1422       buffer[j] = buffer[i - 1 - j];
1423       buffer[i - 1 - j] = c;
1424     }
1425   buffer[i] = '\0';
1426 }
1427
1428 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1429 char *
1430 legible_very_long (VERY_LONG_TYPE l)
1431 {
1432   char inbuf[128];
1433   /* Print the number into the buffer.  */
1434   very_long_to_string (inbuf, l);
1435   return legible_1 (inbuf);
1436 }
1437
1438 /* Count the digits in a (long) integer.  */
1439 int
1440 numdigit (long a)
1441 {
1442   int res = 1;
1443   if (a < 0)
1444     {
1445       a = -a;
1446       ++res;
1447     }
1448   while ((a /= 10) != 0)
1449     ++res;
1450   return res;
1451 }
1452
1453 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1454 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1455
1456 #define DIGITS_1(figure) ONE_DIGIT (figure)
1457 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1458 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1459 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1460 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1461 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1462 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1463 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1464 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1465 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1466
1467 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1468
1469 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1470 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1471 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1472 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1473 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1474 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1475 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1476 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1477 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1478
1479 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1480    to `sprintf(buffer, "%ld", number)', only much faster.
1481
1482    The speedup may make a difference in programs that frequently
1483    convert numbers to strings.  Some implementations of sprintf,
1484    particularly the one in GNU libc, have been known to be extremely
1485    slow compared to this function.
1486
1487    BUFFER should accept as many bytes as you expect the number to take
1488    up.  On machines with 64-bit longs the maximum needed size is 24
1489    bytes.  That includes the worst-case digits, the optional `-' sign,
1490    and the trailing \0.  */
1491
1492 void
1493 long_to_string (char *buffer, long number)
1494 {
1495   char *p = buffer;
1496   long n = number;
1497
1498 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1499   /* We are running in a strange or misconfigured environment.  Let
1500      sprintf cope with it.  */
1501   sprintf (buffer, "%ld", n);
1502 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1503
1504   if (n < 0)
1505     {
1506       *p++ = '-';
1507       n = -n;
1508     }
1509
1510   if      (n < 10)                   { DIGITS_1 (1); }
1511   else if (n < 100)                  { DIGITS_2 (10); }
1512   else if (n < 1000)                 { DIGITS_3 (100); }
1513   else if (n < 10000)                { DIGITS_4 (1000); }
1514   else if (n < 100000)               { DIGITS_5 (10000); }
1515   else if (n < 1000000)              { DIGITS_6 (100000); }
1516   else if (n < 10000000)             { DIGITS_7 (1000000); }
1517   else if (n < 100000000)            { DIGITS_8 (10000000); }
1518   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1519 #if SIZEOF_LONG == 4
1520   /* ``if (1)'' serves only to preserve editor indentation. */
1521   else if (1)                        { DIGITS_10 (1000000000); }
1522 #else  /* SIZEOF_LONG != 4 */
1523   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1524   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1525   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1526   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1527   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1528   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1529   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1530   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1531   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1532   else                               { DIGITS_19 (1000000000000000000L); }
1533 #endif /* SIZEOF_LONG != 4 */
1534
1535   *p = '\0';
1536 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1537 }
1538
1539 #undef ONE_DIGIT
1540 #undef ONE_DIGIT_ADVANCE
1541
1542 #undef DIGITS_1
1543 #undef DIGITS_2
1544 #undef DIGITS_3
1545 #undef DIGITS_4
1546 #undef DIGITS_5
1547 #undef DIGITS_6
1548 #undef DIGITS_7
1549 #undef DIGITS_8
1550 #undef DIGITS_9
1551 #undef DIGITS_10
1552 #undef DIGITS_11
1553 #undef DIGITS_12
1554 #undef DIGITS_13
1555 #undef DIGITS_14
1556 #undef DIGITS_15
1557 #undef DIGITS_16
1558 #undef DIGITS_17
1559 #undef DIGITS_18
1560 #undef DIGITS_19
1561 \f
1562 /* Support for timers. */
1563
1564 #undef TIMER_WINDOWS
1565 #undef TIMER_GETTIMEOFDAY
1566 #undef TIMER_TIME
1567
1568 /* Depending on the OS and availability of gettimeofday(), one and
1569    only one of the above constants will be defined.  Virtually all
1570    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1571    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1572    non-Windows systems without gettimeofday.
1573
1574    #### Perhaps we should also support ftime(), which exists on old
1575    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1576    C, if memory serves me.)  */
1577
1578 #ifdef WINDOWS
1579 # define TIMER_WINDOWS
1580 #else  /* not WINDOWS */
1581 # ifdef HAVE_GETTIMEOFDAY
1582 #  define TIMER_GETTIMEOFDAY
1583 # else
1584 #  define TIMER_TIME
1585 # endif
1586 #endif /* not WINDOWS */
1587
1588 struct wget_timer {
1589 #ifdef TIMER_GETTIMEOFDAY
1590   long secs;
1591   long usecs;
1592 #endif
1593
1594 #ifdef TIMER_TIME
1595   time_t secs;
1596 #endif
1597
1598 #ifdef TIMER_WINDOWS
1599   ULARGE_INTEGER wintime;
1600 #endif
1601 };
1602
1603 /* Allocate a timer.  It is not legal to do anything with a freshly
1604    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1605
1606 struct wget_timer *
1607 wtimer_allocate (void)
1608 {
1609   struct wget_timer *wt =
1610     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1611   return wt;
1612 }
1613
1614 /* Allocate a new timer and reset it.  Return the new timer. */
1615
1616 struct wget_timer *
1617 wtimer_new (void)
1618 {
1619   struct wget_timer *wt = wtimer_allocate ();
1620   wtimer_reset (wt);
1621   return wt;
1622 }
1623
1624 /* Free the resources associated with the timer.  Its further use is
1625    prohibited.  */
1626
1627 void
1628 wtimer_delete (struct wget_timer *wt)
1629 {
1630   xfree (wt);
1631 }
1632
1633 /* Reset timer WT.  This establishes the starting point from which
1634    wtimer_elapsed() will return the number of elapsed
1635    milliseconds.  It is allowed to reset a previously used timer.  */
1636
1637 void
1638 wtimer_reset (struct wget_timer *wt)
1639 {
1640 #ifdef TIMER_GETTIMEOFDAY
1641   struct timeval t;
1642   gettimeofday (&t, NULL);
1643   wt->secs  = t.tv_sec;
1644   wt->usecs = t.tv_usec;
1645 #endif
1646
1647 #ifdef TIMER_TIME
1648   wt->secs = time (NULL);
1649 #endif
1650
1651 #ifdef TIMER_WINDOWS
1652   FILETIME ft;
1653   SYSTEMTIME st;
1654   GetSystemTime (&st);
1655   SystemTimeToFileTime (&st, &ft);
1656   wt->wintime.HighPart = ft.dwHighDateTime;
1657   wt->wintime.LowPart  = ft.dwLowDateTime;
1658 #endif
1659 }
1660
1661 /* Return the number of milliseconds elapsed since the timer was last
1662    reset.  It is allowed to call this function more than once to get
1663    increasingly higher elapsed values.  */
1664
1665 long
1666 wtimer_elapsed (struct wget_timer *wt)
1667 {
1668 #ifdef TIMER_GETTIMEOFDAY
1669   struct timeval t;
1670   gettimeofday (&t, NULL);
1671   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1672 #endif
1673
1674 #ifdef TIMER_TIME
1675   time_t now = time (NULL);
1676   return 1000 * (now - wt->secs);
1677 #endif
1678
1679 #ifdef WINDOWS
1680   FILETIME ft;
1681   SYSTEMTIME st;
1682   ULARGE_INTEGER uli;
1683   GetSystemTime (&st);
1684   SystemTimeToFileTime (&st, &ft);
1685   uli.HighPart = ft.dwHighDateTime;
1686   uli.LowPart = ft.dwLowDateTime;
1687   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1688 #endif
1689 }
1690
1691 /* Return the assessed granularity of the timer implementation.  This
1692    is important for certain code that tries to deal with "zero" time
1693    intervals.  */
1694
1695 long
1696 wtimer_granularity (void)
1697 {
1698 #ifdef TIMER_GETTIMEOFDAY
1699   /* Granularity of gettimeofday is hugely architecture-dependent.
1700      However, it appears that on modern machines it is better than
1701      1ms.  */
1702   return 1;
1703 #endif
1704
1705 #ifdef TIMER_TIME
1706   /* This is clear. */
1707   return 1000;
1708 #endif
1709
1710 #ifdef TIMER_WINDOWS
1711   /* ? */
1712   return 1;
1713 #endif
1714 }
1715 \f
1716 /* This should probably be at a better place, but it doesn't really
1717    fit into html-parse.c.  */
1718
1719 /* The function returns the pointer to the malloc-ed quoted version of
1720    string s.  It will recognize and quote numeric and special graphic
1721    entities, as per RFC1866:
1722
1723    `&' -> `&amp;'
1724    `<' -> `&lt;'
1725    `>' -> `&gt;'
1726    `"' -> `&quot;'
1727    SP  -> `&#32;'
1728
1729    No other entities are recognized or replaced.  */
1730 char *
1731 html_quote_string (const char *s)
1732 {
1733   const char *b = s;
1734   char *p, *res;
1735   int i;
1736
1737   /* Pass through the string, and count the new size.  */
1738   for (i = 0; *s; s++, i++)
1739     {
1740       if (*s == '&')
1741         i += 4;                 /* `amp;' */
1742       else if (*s == '<' || *s == '>')
1743         i += 3;                 /* `lt;' and `gt;' */
1744       else if (*s == '\"')
1745         i += 5;                 /* `quot;' */
1746       else if (*s == ' ')
1747         i += 4;                 /* #32; */
1748     }
1749   res = (char *)xmalloc (i + 1);
1750   s = b;
1751   for (p = res; *s; s++)
1752     {
1753       switch (*s)
1754         {
1755         case '&':
1756           *p++ = '&';
1757           *p++ = 'a';
1758           *p++ = 'm';
1759           *p++ = 'p';
1760           *p++ = ';';
1761           break;
1762         case '<': case '>':
1763           *p++ = '&';
1764           *p++ = (*s == '<' ? 'l' : 'g');
1765           *p++ = 't';
1766           *p++ = ';';
1767           break;
1768         case '\"':
1769           *p++ = '&';
1770           *p++ = 'q';
1771           *p++ = 'u';
1772           *p++ = 'o';
1773           *p++ = 't';
1774           *p++ = ';';
1775           break;
1776         case ' ':
1777           *p++ = '&';
1778           *p++ = '#';
1779           *p++ = '3';
1780           *p++ = '2';
1781           *p++ = ';';
1782           break;
1783         default:
1784           *p++ = *s;
1785         }
1786     }
1787   *p = '\0';
1788   return res;
1789 }
1790
1791 /* Determine the width of the terminal we're running on.  If that's
1792    not possible, return 0.  */
1793
1794 int
1795 determine_screen_width (void)
1796 {
1797   /* If there's a way to get the terminal size using POSIX
1798      tcgetattr(), somebody please tell me.  */
1799 #ifndef TIOCGWINSZ
1800   return 0;
1801 #else  /* TIOCGWINSZ */
1802   int fd;
1803   struct winsize wsz;
1804
1805   if (opt.lfilename != NULL)
1806     return 0;
1807
1808   fd = fileno (stderr);
1809   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1810     return 0;                   /* most likely ENOTTY */
1811
1812   return wsz.ws_col;
1813 #endif /* TIOCGWINSZ */
1814 }
1815
1816 #if 0
1817 /* A debugging function for checking whether an MD5 library works. */
1818
1819 #include "gen-md5.h"
1820
1821 char *
1822 debug_test_md5 (char *buf)
1823 {
1824   unsigned char raw[16];
1825   static char res[33];
1826   unsigned char *p1;
1827   char *p2;
1828   int cnt;
1829   ALLOCA_MD5_CONTEXT (ctx);
1830
1831   gen_md5_init (ctx);
1832   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1833   gen_md5_finish (ctx, raw);
1834
1835   p1 = raw;
1836   p2 = res;
1837   cnt = 16;
1838   while (cnt--)
1839     {
1840       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1841       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1842       ++p1;
1843     }
1844   *p2 = '\0';
1845
1846   return res;
1847 }
1848 #endif