sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 #include "wget.h"
  63 #include "utils.h"
  64 #include "fnmatch.h"
  65 #include "hash.h"
  66
  67 #ifndef errno
  68 extern int errno;
  69 #endif
  70
  71 /* This section implements several wrappers around the basic
  72    allocation routines.  This is done for two reasons: first, so that
  73    the callers of these functions need not consistently check for
  74    errors.  If there is not enough virtual memory for running Wget,
  75    something is seriously wrong, and Wget exits with an appropriate
  76    error message.
  77
  78    The second reason why these are useful is that, if DEBUG_MALLOC is
  79    defined, they also provide a handy (if crude) malloc debugging
  80    interface that checks memory leaks.  */
  81
  82 /* Croak the fatal memory error and bail out with non-zero exit
  83    status.  */
  84 static void
  85 memfatal (const char *what)
  86 {
  87   /* HACK: expose save_log_p from log.c, so we can turn it off in
  88      order to prevent saving the log.  Saving the log is dangerous
  89      because logprintf() and logputs() can call malloc(), so this
  90      could infloop.  When logging is turned off, infloop can no longer
  91      happen.
  92
  93      #### This is no longer really necessary because the new routines
  94      in log.c cons only if the line exceeds eighty characters.  But
  95      this can come at the end of a line, so it's OK to be careful.
  96
  97      On a more serious note, it would be good to have a
  98      log_forced_shutdown() routine that exposes this cleanly.  */
  99   extern int save_log_p;
 100
 101   save_log_p = 0;
 102   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
 103   exit (1);
 104 }
 105
 106 /* These functions end with _real because they need to be
 107    distinguished from the debugging functions, and from the macros.
 108    Explanation follows:
 109
 110    If memory debugging is not turned on, wget.h defines these:
 111
 112      #define xmalloc xmalloc_real
 113      #define xrealloc xrealloc_real
 114      #define xstrdup xstrdup_real
 115      #define xfree free
 116
 117    In case of memory debugging, the definitions are a bit more
 118    complex, because we want to provide more information, *and* we want
 119    to call the debugging code.  (The former is the reason why xmalloc
 120    and friends need to be macros in the first place.)  Then it looks
 121    like this:
 122
 123      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 124      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 125      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 126      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 127
 128    Each of the *_debug function does its magic and calls the real one.  */
 129
 130 #ifdef DEBUG_MALLOC
 131 # define STATIC_IF_DEBUG static
 132 #else
 133 # define STATIC_IF_DEBUG
 134 #endif
 135
 136 STATIC_IF_DEBUG void *
 137 xmalloc_real (size_t size)
 138 {
 139   void *ptr = malloc (size);
 140   if (!ptr)
 141     memfatal ("malloc");
 142   return ptr;
 143 }
 144
 145 STATIC_IF_DEBUG void *
 146 xrealloc_real (void *ptr, size_t newsize)
 147 {
 148   void *newptr;
 149
 150   /* Not all Un*xes have the feature of realloc() that calling it with
 151      a NULL-pointer is the same as malloc(), but it is easy to
 152      simulate.  */
 153   if (ptr)
 154     newptr = realloc (ptr, newsize);
 155   else
 156     newptr = malloc (newsize);
 157   if (!newptr)
 158     memfatal ("realloc");
 159   return newptr;
 160 }
 161
 162 STATIC_IF_DEBUG char *
 163 xstrdup_real (const char *s)
 164 {
 165   char *copy;
 166
 167 #ifndef HAVE_STRDUP
 168   int l = strlen (s);
 169   copy = malloc (l + 1);
 170   if (!copy)
 171     memfatal ("strdup");
 172   memcpy (copy, s, l + 1);
 173 #else  /* HAVE_STRDUP */
 174   copy = strdup (s);
 175   if (!copy)
 176     memfatal ("strdup");
 177 #endif /* HAVE_STRDUP */
 178
 179   return copy;
 180 }
 181
 182 #ifdef DEBUG_MALLOC
 183
 184 /* Crude home-grown routines for debugging some malloc-related
 185    problems.  Featured:
 186
 187    * Counting the number of malloc and free invocations, and reporting
 188      the "balance", i.e. how many times more malloc was called than it
 189      was the case with free.
 190
 191    * Making malloc store its entry into a simple array and free remove
 192      stuff from that array.  At the end, print the pointers which have
 193      not been freed, along with the source file and the line number.
 194      This also has the side-effect of detecting freeing memory that
 195      was never allocated.
 196
 197    Note that this kind of memory leak checking strongly depends on
 198    every malloc() being followed by a free(), even if the program is
 199    about to finish.  Wget is careful to free the data structure it
 200    allocated in init.c.  */
 201
 202 static int malloc_count, free_count;
 203
 204 static struct {
 205   char *ptr;
 206   const char *file;
 207   int line;
 208 } malloc_debug[100000];
 209
 210 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 211    which can be a real problem.  It would be nice to use a hash table
 212    for malloc_debug, but the functions in hash.c are not suitable
 213    because they can call malloc() themselves.  Maybe it would work if
 214    the hash table were preallocated to a huge size, and if we set the
 215    rehash threshold to 1.0.  */
 216
 217 /* Register PTR in malloc_debug.  Abort if this is not possible
 218    (presumably due to the number of current allocations exceeding the
 219    size of malloc_debug.)  */
 220
 221 static void
 222 register_ptr (void *ptr, const char *file, int line)
 223 {
 224   int i;
 225   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 226     if (malloc_debug[i].ptr == NULL)
 227       {
 228         malloc_debug[i].ptr = ptr;
 229         malloc_debug[i].file = file;
 230         malloc_debug[i].line = line;
 231         return;
 232       }
 233   abort ();
 234 }
 235
 236 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 237    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 238
 239 static void
 240 unregister_ptr (void *ptr)
 241 {
 242   int i;
 243   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 244     if (malloc_debug[i].ptr == ptr)
 245       {
 246         malloc_debug[i].ptr = NULL;
 247         return;
 248       }
 249   abort ();
 250 }
 251
 252 /* Print the malloc debug stats that can be gathered from the above
 253    information.  Currently this is the count of mallocs, frees, the
 254    difference between the two, and the dump of the contents of
 255    malloc_debug.  The last part are the memory leaks.  */
 256
 257 void
 258 print_malloc_debug_stats (void)
 259 {
 260   int i;
 261   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 262           malloc_count, free_count, malloc_count - free_count);
 263   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 264     if (malloc_debug[i].ptr != NULL)
 265       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 266               malloc_debug[i].file, malloc_debug[i].line);
 267 }
 268
 269 void *
 270 xmalloc_debug (size_t size, const char *source_file, int source_line)
 271 {
 272   void *ptr = xmalloc_real (size);
 273   ++malloc_count;
 274   register_ptr (ptr, source_file, source_line);
 275   return ptr;
 276 }
 277
 278 void
 279 xfree_debug (void *ptr, const char *source_file, int source_line)
 280 {
 281   assert (ptr != NULL);
 282   ++free_count;
 283   unregister_ptr (ptr);
 284   free (ptr);
 285 }
 286
 287 void *
 288 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 289 {
 290   void *newptr = xrealloc_real (ptr, newsize);
 291   if (!ptr)
 292     {
 293       ++malloc_count;
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   else if (newptr != ptr)
 297     {
 298       unregister_ptr (ptr);
 299       register_ptr (newptr, source_file, source_line);
 300     }
 301   return newptr;
 302 }
 303
 304 char *
 305 xstrdup_debug (const char *s, const char *source_file, int source_line)
 306 {
 307   char *copy = xstrdup_real (s);
 308   ++malloc_count;
 309   register_ptr (copy, source_file, source_line);
 310   return copy;
 311 }
 312
 313 #endif /* DEBUG_MALLOC */
 314 \f
 315 /* Utility function: like xstrdup(), but also lowercases S.  */
 316
 317 char *
 318 xstrdup_lower (const char *s)
 319 {
 320   char *copy = xstrdup (s);
 321   char *p = copy;
 322   for (; *p; p++)
 323     *p = TOLOWER (*p);
 324   return copy;
 325 }
 326
 327 /* Return a count of how many times CHR occurs in STRING. */
 328
 329 int
 330 count_char (const char *string, char chr)
 331 {
 332   const char *p;
 333   int count = 0;
 334   for (p = string; *p; p++)
 335     if (*p == chr)
 336       ++count;
 337   return count;
 338 }
 339
 340 /* Copy the string formed by two pointers (one on the beginning, other
 341    on the char after the last char) to a new, malloc-ed location.
 342    0-terminate it.  */
 343 char *
 344 strdupdelim (const char *beg, const char *end)
 345 {
 346   char *res = (char *)xmalloc (end - beg + 1);
 347   memcpy (res, beg, end - beg);
 348   res[end - beg] = '\0';
 349   return res;
 350 }
 351
 352 /* Parse a string containing comma-separated elements, and return a
 353    vector of char pointers with the elements.  Spaces following the
 354    commas are ignored.  */
 355 char **
 356 sepstring (const char *s)
 357 {
 358   char **res;
 359   const char *p;
 360   int i = 0;
 361
 362   if (!s || !*s)
 363     return NULL;
 364   res = NULL;
 365   p = s;
 366   while (*s)
 367     {
 368       if (*s == ',')
 369         {
 370           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 371           res[i] = strdupdelim (p, s);
 372           res[++i] = NULL;
 373           ++s;
 374           /* Skip the blanks following the ','.  */
 375           while (ISSPACE (*s))
 376             ++s;
 377           p = s;
 378         }
 379       else
 380         ++s;
 381     }
 382   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 383   res[i] = strdupdelim (p, s);
 384   res[i + 1] = NULL;
 385   return res;
 386 }
 387 \f
 388 /* Return pointer to a static char[] buffer in which zero-terminated
 389    string-representation of TM (in form hh:mm:ss) is printed.
 390
 391    If TM is non-NULL, the current time-in-seconds will be stored
 392    there.
 393
 394    (#### This is misleading: one would expect TM would be used instead
 395    of the current time in that case.  This design was probably
 396    influenced by the design time(2), and should be changed at some
 397    points.  No callers use non-NULL TM anyway.)  */
 398
 399 char *
 400 time_str (time_t *tm)
 401 {
 402   static char output[15];
 403   struct tm *ptm;
 404   time_t secs = time (tm);
 405
 406   if (secs == -1)
 407     {
 408       /* In case of error, return the empty string.  Maybe we should
 409          just abort if this happens?  */
 410       *output = '\0';
 411       return output;
 412     }
 413   ptm = localtime (&secs);
 414   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 415   return output;
 416 }
 417
 418 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 419
 420 char *
 421 datetime_str (time_t *tm)
 422 {
 423   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 424   struct tm *ptm;
 425   time_t secs = time (tm);
 426
 427   if (secs == -1)
 428     {
 429       /* In case of error, return the empty string.  Maybe we should
 430          just abort if this happens?  */
 431       *output = '\0';
 432       return output;
 433     }
 434   ptm = localtime (&secs);
 435   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 436            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 437            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 438   return output;
 439 }
 440 \f
 441 /* The Windows versions of the following two functions are defined in
 442    mswindows.c.  */
 443
 444 #ifndef WINDOWS
 445 void
 446 fork_to_background (void)
 447 {
 448   pid_t pid;
 449   /* Whether we arrange our own version of opt.lfilename here.  */
 450   int changedp = 0;
 451
 452   if (!opt.lfilename)
 453     {
 454       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 455       changedp = 1;
 456     }
 457   pid = fork ();
 458   if (pid < 0)
 459     {
 460       /* parent, error */
 461       perror ("fork");
 462       exit (1);
 463     }
 464   else if (pid != 0)
 465     {
 466       /* parent, no error */
 467       printf (_("Continuing in background.\n"));
 468       if (changedp)
 469         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 470       exit (0);
 471     }
 472   /* child: keep running */
 473 }
 474 #endif /* not WINDOWS */
 475 \f
 476 #if 0
 477 /* debug */
 478 char *
 479 ps (char *orig)
 480 {
 481   char *r = xstrdup (orig);
 482   path_simplify (r);
 483   return r;
 484 }
 485 #endif
 486
 487 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 488    in that:
 489         Multple `/'s are collapsed to a single `/'.
 490         Leading `./'s and trailing `/.'s are removed.
 491         Trailing `/'s are removed.
 492         Non-leading `../'s and trailing `..'s are handled by removing
 493         portions of the path.
 494
 495    E.g. "a/b/c/./../d/.." will yield "a/b/".  This function originates
 496    from GNU Bash and has been mutilated to unrecognition for use in
 497    Wget.
 498
 499    Changes for Wget:
 500         Always use '/' as stub_char.
 501         Don't check for local things using canon_stat.
 502         Change the original string instead of strdup-ing.
 503         React correctly when beginning with `./' and `../'.
 504         Don't zip out trailing slashes.
 505         Return a value indicating whether any modifications took place.
 506
 507    If you dare change this function, take a careful look at the test
 508    cases below, and make sure that they pass.  */
 509
 510 int
 511 path_simplify (char *path)
 512 {
 513   register int i, start;
 514   int changes = 0;
 515   char stub_char;
 516
 517   if (!*path)
 518     return 0;
 519
 520   stub_char = '/';
 521
 522   if (path[0] == '/')
 523     /* Preserve initial '/'. */
 524     ++path;
 525
 526   /* Nix out leading `.' or `..' with.  */
 527   if ((path[0] == '.' && path[1] == '\0')
 528       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 529     {
 530       path[0] = '\0';
 531       changes = 1;
 532       return changes;
 533     }
 534
 535   /* Walk along PATH looking for things to compact.  */
 536   i = 0;
 537   while (1)
 538     {
 539       if (!path[i])
 540         break;
 541
 542       while (path[i] && path[i] != '/')
 543         i++;
 544
 545       start = i++;
 546
 547       /* If we didn't find any slashes, then there is nothing left to do.  */
 548       if (!path[start])
 549         break;
 550
 551       /* Handle multiple `/'s in a row.  */
 552       while (path[i] == '/')
 553         i++;
 554
 555       if ((start + 1) != i)
 556         {
 557           strcpy (path + start + 1, path + i);
 558           i = start + 1;
 559           changes = 1;
 560         }
 561
 562       /* Check for `../', `./' or trailing `.' by itself.  */
 563       if (path[i] == '.')
 564         {
 565           /* Handle trailing `.' by itself.  */
 566           if (!path[i + 1])
 567             {
 568               path[--i] = '\0';
 569               changes = 1;
 570               break;
 571             }
 572
 573           /* Handle `./'.  */
 574           if (path[i + 1] == '/')
 575             {
 576               strcpy (path + i, path + i + 1);
 577               i = (start < 0) ? 0 : start;
 578               changes = 1;
 579               continue;
 580             }
 581
 582           /* Handle `../' or trailing `..' by itself.  */
 583           if (path[i + 1] == '.' &&
 584               (path[i + 2] == '/' || !path[i + 2]))
 585             {
 586               while (--start > -1 && path[start] != '/');
 587               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 588               i = (start < 0) ? 0 : start;
 589               changes = 1;
 590               continue;
 591             }
 592         }       /* path == '.' */
 593     } /* while */
 594
 595   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 596   i = 0;
 597   while (1)
 598     {
 599       if (path[i] == '.' && path[i + 1] == '/')
 600         i += 2;
 601       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 602         i += 3;
 603       else
 604         break;
 605     }
 606   if (i)
 607     {
 608       strcpy (path, path + i - 0);
 609       changes = 1;
 610     }
 611
 612   return changes;
 613 }
 614
 615 /* Test cases:
 616    ps("")                   -> ""
 617    ps("/")                  -> "/"
 618    ps(".")                  -> ""
 619    ps("..")                 -> ""
 620    ps("/.")                 -> "/"
 621    ps("/..")                -> "/"
 622    ps("foo")                -> "foo"
 623    ps("foo/bar")            -> "foo/bar"
 624    ps("foo//bar")           -> "foo/bar"             (possibly a bug)
 625    ps("foo/../bar")         -> "bar"
 626    ps("foo/bar/..")         -> "foo/"
 627    ps("foo/bar/../x")       -> "foo/x"
 628    ps("foo/bar/../x/")      -> "foo/x/"
 629    ps("foo/..")             -> ""
 630    ps("/foo/..")            -> "/"
 631    ps("a/b/../../c")        -> "c"
 632    ps("/a/b/../../c")       -> "/c"
 633    ps("./a/../b")           -> "b"
 634    ps("/./a/../b")          -> "/b"
 635 */
 636 \f
 637 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 638    specified with TM.  */
 639 void
 640 touch (const char *file, time_t tm)
 641 {
 642 #ifdef HAVE_STRUCT_UTIMBUF
 643   struct utimbuf times;
 644   times.actime = times.modtime = tm;
 645 #else
 646   time_t times[2];
 647   times[0] = times[1] = tm;
 648 #endif
 649
 650   if (utime (file, &times) == -1)
 651     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 652 }
 653
 654 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 655    nothing under MS-Windows.  */
 656 int
 657 remove_link (const char *file)
 658 {
 659   int err = 0;
 660   struct stat st;
 661
 662   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 663     {
 664       DEBUGP (("Unlinking %s (symlink).\n", file));
 665       err = unlink (file);
 666       if (err != 0)
 667         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 668                    file, strerror (errno));
 669     }
 670   return err;
 671 }
 672
 673 /* Does FILENAME exist?  This is quite a lousy implementation, since
 674    it supplies no error codes -- only a yes-or-no answer.  Thus it
 675    will return that a file does not exist if, e.g., the directory is
 676    unreadable.  I don't mind it too much currently, though.  The
 677    proper way should, of course, be to have a third, error state,
 678    other than true/false, but that would introduce uncalled-for
 679    additional complexity to the callers.  */
 680 int
 681 file_exists_p (const char *filename)
 682 {
 683 #ifdef HAVE_ACCESS
 684   return access (filename, F_OK) >= 0;
 685 #else
 686   struct stat buf;
 687   return stat (filename, &buf) >= 0;
 688 #endif
 689 }
 690
 691 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 692    Returns 0 on error.  */
 693 int
 694 file_non_directory_p (const char *path)
 695 {
 696   struct stat buf;
 697   /* Use lstat() rather than stat() so that symbolic links pointing to
 698      directories can be identified correctly.  */
 699   if (lstat (path, &buf) != 0)
 700     return 0;
 701   return S_ISDIR (buf.st_mode) ? 0 : 1;
 702 }
 703
 704 /* Return a unique filename, given a prefix and count */
 705 static char *
 706 unique_name_1 (const char *fileprefix, int count)
 707 {
 708   char *filename;
 709
 710   if (count)
 711     {
 712       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 713       sprintf (filename, "%s.%d", fileprefix, count);
 714     }
 715   else
 716     filename = xstrdup (fileprefix);
 717
 718   if (!file_exists_p (filename))
 719     return filename;
 720   else
 721     {
 722       xfree (filename);
 723       return NULL;
 724     }
 725 }
 726
 727 /* Return a unique file name, based on PREFIX.  */
 728 char *
 729 unique_name (const char *prefix)
 730 {
 731   char *file = NULL;
 732   int count = 0;
 733
 734   while (!file)
 735     file = unique_name_1 (prefix, count++);
 736   return file;
 737 }
 738 \f
 739 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 740    are missing, create them first.  In case any mkdir() call fails,
 741    return its error status.  Returns 0 on successful completion.
 742
 743    The behaviour of this function should be identical to the behaviour
 744    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 745 int
 746 make_directory (const char *directory)
 747 {
 748   int quit = 0;
 749   int i;
 750   char *dir;
 751
 752   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 753      function is unsafe if called with a read-only char *argument.  */
 754   STRDUP_ALLOCA (dir, directory);
 755
 756   /* If the first character of dir is '/', skip it (and thus enable
 757      creation of absolute-pathname directories.  */
 758   for (i = (*dir == '/'); 1; ++i)
 759     {
 760       for (; dir[i] && dir[i] != '/'; i++)
 761         ;
 762       if (!dir[i])
 763         quit = 1;
 764       dir[i] = '\0';
 765       /* Check whether the directory already exists.  */
 766       if (!file_exists_p (dir))
 767         {
 768           if (mkdir (dir, 0777) < 0)
 769             return -1;
 770         }
 771       if (quit)
 772         break;
 773       else
 774         dir[i] = '/';
 775     }
 776   return 0;
 777 }
 778
 779 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 780    should be a file name.
 781
 782    file_merge("/foo/bar", "baz")  => "/foo/baz"
 783    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 784    file_merge("foo", "bar")       => "bar"
 785
 786    In other words, it's a simpler and gentler version of uri_merge_1.  */
 787
 788 char *
 789 file_merge (const char *base, const char *file)
 790 {
 791   char *result;
 792   const char *cut = (const char *)strrchr (base, '/');
 793
 794   if (!cut)
 795     return xstrdup (file);
 796
 797   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 798   memcpy (result, base, cut - base);
 799   result[cut - base] = '/';
 800   strcpy (result + (cut - base) + 1, file);
 801
 802   return result;
 803 }
 804 \f
 805 static int in_acclist PARAMS ((const char *const *, const char *, int));
 806
 807 /* Determine whether a file is acceptable to be followed, according to
 808    lists of patterns to accept/reject.  */
 809 int
 810 acceptable (const char *s)
 811 {
 812   int l = strlen (s);
 813
 814   while (l && s[l] != '/')
 815     --l;
 816   if (s[l] == '/')
 817     s += (l + 1);
 818   if (opt.accepts)
 819     {
 820       if (opt.rejects)
 821         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 822                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 823       else
 824         return in_acclist ((const char *const *)opt.accepts, s, 1);
 825     }
 826   else if (opt.rejects)
 827     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 828   return 1;
 829 }
 830
 831 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 832    `/something', frontcmp() will return 1 only if S2 begins with
 833    `/something'.  Otherwise, 0 is returned.  */
 834 int
 835 frontcmp (const char *s1, const char *s2)
 836 {
 837   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 838   return !*s1;
 839 }
 840
 841 /* Iterate through STRLIST, and return the first element that matches
 842    S, through wildcards or front comparison (as appropriate).  */
 843 static char *
 844 proclist (char **strlist, const char *s, enum accd flags)
 845 {
 846   char **x;
 847
 848   for (x = strlist; *x; x++)
 849     if (has_wildcards_p (*x))
 850       {
 851         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 852           break;
 853       }
 854     else
 855       {
 856         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 857         if (frontcmp (p, s))
 858           break;
 859       }
 860   return *x;
 861 }
 862
 863 /* Returns whether DIRECTORY is acceptable for download, wrt the
 864    include/exclude lists.
 865
 866    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 867    and absolute paths may be freely intermixed.  */
 868 int
 869 accdir (const char *directory, enum accd flags)
 870 {
 871   /* Remove starting '/'.  */
 872   if (flags & ALLABS && *directory == '/')
 873     ++directory;
 874   if (opt.includes)
 875     {
 876       if (!proclist (opt.includes, directory, flags))
 877         return 0;
 878     }
 879   if (opt.excludes)
 880     {
 881       if (proclist (opt.excludes, directory, flags))
 882         return 0;
 883     }
 884   return 1;
 885 }
 886
 887 /* Match the end of STRING against PATTERN.  For instance:
 888
 889    match_backwards ("abc", "bc") -> 1
 890    match_backwards ("abc", "ab") -> 0
 891    match_backwards ("abc", "abc") -> 1 */
 892 int
 893 match_tail (const char *string, const char *pattern)
 894 {
 895   int i, j;
 896
 897   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 898     if (string[i] != pattern[j])
 899       break;
 900   /* If the pattern was exhausted, the match was succesful.  */
 901   if (j == -1)
 902     return 1;
 903   else
 904     return 0;
 905 }
 906
 907 /* Checks whether string S matches each element of ACCEPTS.  A list
 908    element are matched either with fnmatch() or match_tail(),
 909    according to whether the element contains wildcards or not.
 910
 911    If the BACKWARD is 0, don't do backward comparison -- just compare
 912    them normally.  */
 913 static int
 914 in_acclist (const char *const *accepts, const char *s, int backward)
 915 {
 916   for (; *accepts; accepts++)
 917     {
 918       if (has_wildcards_p (*accepts))
 919         {
 920           /* fnmatch returns 0 if the pattern *does* match the
 921              string.  */
 922           if (fnmatch (*accepts, s, 0) == 0)
 923             return 1;
 924         }
 925       else
 926         {
 927           if (backward)
 928             {
 929               if (match_tail (s, *accepts))
 930                 return 1;
 931             }
 932           else
 933             {
 934               if (!strcmp (s, *accepts))
 935                 return 1;
 936             }
 937         }
 938     }
 939   return 0;
 940 }
 941
 942 /* Return the location of STR's suffix (file extension).  Examples:
 943    suffix ("foo.bar")       -> "bar"
 944    suffix ("foo.bar.baz")   -> "baz"
 945    suffix ("/foo/bar")      -> NULL
 946    suffix ("/foo.bar/baz")  -> NULL  */
 947 char *
 948 suffix (const char *str)
 949 {
 950   int i;
 951
 952   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 953     ;
 954
 955   if (str[i++] == '.')
 956     return (char *)str + i;
 957   else
 958     return NULL;
 959 }
 960
 961 /* Read a line from FP.  The function reallocs the storage as needed
 962    to accomodate for any length of the line.  Reallocs are done
 963    exponentially, doubling the storage after each overflow to minimize
 964    the number of calls to realloc() and fgets().  The newline
 965    character at the end of line is retained.
 966
 967    After end-of-file is encountered without anything being read, NULL
 968    is returned.  NULL is also returned on error.  To distinguish
 969    between these two cases, use the stdio function ferror().
 970
 971    A future version of this function will be rewritten to use fread()
 972    instead of fgets(), and to return the length of the line, which
 973    will make the function usable on files with binary content.  */
 974
 975 char *
 976 read_whole_line (FILE *fp)
 977 {
 978   int length = 0;
 979   int bufsize = 81;
 980   char *line = (char *)xmalloc (bufsize);
 981
 982   while (fgets (line + length, bufsize - length, fp))
 983     {
 984       length += strlen (line + length);
 985       if (length == 0)
 986         /* Possible for example when reading from a binary file where
 987            a line begins with \0.  */
 988         continue;
 989
 990       if (line[length - 1] == '\n')
 991         break;
 992
 993       /* fgets() guarantees to read the whole line, or to use up the
 994          space we've given it.  We can double the buffer
 995          unconditionally.  */
 996       bufsize <<= 1;
 997       line = xrealloc (line, bufsize);
 998     }
 999   if (length == 0 || ferror (fp))
1000     {
1001       xfree (line);
1002       return NULL;
1003     }
1004   if (length + 1 < bufsize)
1005     /* Relieve the memory from our exponential greediness.  We say
1006        `length + 1' because the terminating \0 is not included in
1007        LENGTH.  We don't need to zero-terminate the string ourselves,
1008        though, because fgets() does that.  */
1009     line = xrealloc (line, length + 1);
1010   return line;
1011 }
1012 \f
1013 /* Read FILE into memory.  A pointer to `struct file_memory' are
1014    returned; use struct element `content' to access file contents, and
1015    the element `length' to know the file length.  `content' is *not*
1016    zero-terminated, and you should *not* read or write beyond the [0,
1017    length) range of characters.
1018
1019    After you are done with the file contents, call read_file_free to
1020    release the memory.
1021
1022    Depending on the operating system and the type of file that is
1023    being read, read_file() either mmap's the file into memory, or
1024    reads the file into the core using read().
1025
1026    If file is named "-", fileno(stdin) is used for reading instead.
1027    If you want to read from a real file named "-", use "./-" instead.  */
1028
1029 struct file_memory *
1030 read_file (const char *file)
1031 {
1032   int fd;
1033   struct file_memory *fm;
1034   long size;
1035   int inhibit_close = 0;
1036
1037   /* Some magic in the finest tradition of Perl and its kin: if FILE
1038      is "-", just use stdin.  */
1039   if (HYPHENP (file))
1040     {
1041       fd = fileno (stdin);
1042       inhibit_close = 1;
1043       /* Note that we don't inhibit mmap() in this case.  If stdin is
1044          redirected from a regular file, mmap() will still work.  */
1045     }
1046   else
1047     fd = open (file, O_RDONLY);
1048   if (fd < 0)
1049     return NULL;
1050   fm = xmalloc (sizeof (struct file_memory));
1051
1052 #ifdef HAVE_MMAP
1053   {
1054     struct stat buf;
1055     if (fstat (fd, &buf) < 0)
1056       goto mmap_lose;
1057     fm->length = buf.st_size;
1058     /* NOTE: As far as I know, the callers of this function never
1059        modify the file text.  Relying on this would enable us to
1060        specify PROT_READ and MAP_SHARED for a marginal gain in
1061        efficiency, but at some cost to generality.  */
1062     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1063                         MAP_PRIVATE, fd, 0);
1064     if (fm->content == (char *)MAP_FAILED)
1065       goto mmap_lose;
1066     if (!inhibit_close)
1067       close (fd);
1068
1069     fm->mmap_p = 1;
1070     return fm;
1071   }
1072
1073  mmap_lose:
1074   /* The most common reason why mmap() fails is that FD does not point
1075      to a plain file.  However, it's also possible that mmap() doesn't
1076      work for a particular type of file.  Therefore, whenever mmap()
1077      fails, we just fall back to the regular method.  */
1078 #endif /* HAVE_MMAP */
1079
1080   fm->length = 0;
1081   size = 512;                   /* number of bytes fm->contents can
1082                                    hold at any given time. */
1083   fm->content = xmalloc (size);
1084   while (1)
1085     {
1086       long nread;
1087       if (fm->length > size / 2)
1088         {
1089           /* #### I'm not sure whether the whole exponential-growth
1090              thing makes sense with kernel read.  On Linux at least,
1091              read() refuses to read more than 4K from a file at a
1092              single chunk anyway.  But other Unixes might optimize it
1093              better, and it doesn't *hurt* anything, so I'm leaving
1094              it.  */
1095
1096           /* Normally, we grow SIZE exponentially to make the number
1097              of calls to read() and realloc() logarithmic in relation
1098              to file size.  However, read() can read an amount of data
1099              smaller than requested, and it would be unreasonably to
1100              double SIZE every time *something* was read.  Therefore,
1101              we double SIZE only when the length exceeds half of the
1102              entire allocated size.  */
1103           size <<= 1;
1104           fm->content = xrealloc (fm->content, size);
1105         }
1106       nread = read (fd, fm->content + fm->length, size - fm->length);
1107       if (nread > 0)
1108         /* Successful read. */
1109         fm->length += nread;
1110       else if (nread < 0)
1111         /* Error. */
1112         goto lose;
1113       else
1114         /* EOF */
1115         break;
1116     }
1117   if (!inhibit_close)
1118     close (fd);
1119   if (size > fm->length && fm->length != 0)
1120     /* Due to exponential growth of fm->content, the allocated region
1121        might be much larger than what is actually needed.  */
1122     fm->content = xrealloc (fm->content, fm->length);
1123   fm->mmap_p = 0;
1124   return fm;
1125
1126  lose:
1127   if (!inhibit_close)
1128     close (fd);
1129   xfree (fm->content);
1130   xfree (fm);
1131   return NULL;
1132 }
1133
1134 /* Release the resources held by FM.  Specifically, this calls
1135    munmap() or xfree() on fm->content, depending whether mmap or
1136    malloc/read were used to read in the file.  It also frees the
1137    memory needed to hold the FM structure itself.  */
1138
1139 void
1140 read_file_free (struct file_memory *fm)
1141 {
1142 #ifdef HAVE_MMAP
1143   if (fm->mmap_p)
1144     {
1145       munmap (fm->content, fm->length);
1146     }
1147   else
1148 #endif
1149     {
1150       xfree (fm->content);
1151     }
1152   xfree (fm);
1153 }
1154 \f
1155 /* Free the pointers in a NULL-terminated vector of pointers, then
1156    free the pointer itself.  */
1157 void
1158 free_vec (char **vec)
1159 {
1160   if (vec)
1161     {
1162       char **p = vec;
1163       while (*p)
1164         xfree (*p++);
1165       xfree (vec);
1166     }
1167 }
1168
1169 /* Append vector V2 to vector V1.  The function frees V2 and
1170    reallocates V1 (thus you may not use the contents of neither
1171    pointer after the call).  If V1 is NULL, V2 is returned.  */
1172 char **
1173 merge_vecs (char **v1, char **v2)
1174 {
1175   int i, j;
1176
1177   if (!v1)
1178     return v2;
1179   if (!v2)
1180     return v1;
1181   if (!*v2)
1182     {
1183       /* To avoid j == 0 */
1184       xfree (v2);
1185       return v1;
1186     }
1187   /* Count v1.  */
1188   for (i = 0; v1[i]; i++);
1189   /* Count v2.  */
1190   for (j = 0; v2[j]; j++);
1191   /* Reallocate v1.  */
1192   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1193   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1194   xfree (v2);
1195   return v1;
1196 }
1197
1198 /* A set of simple-minded routines to store strings in a linked list.
1199    This used to also be used for searching, but now we have hash
1200    tables for that.  */
1201
1202 /* It's a shame that these simple things like linked lists and hash
1203    tables (see hash.c) need to be implemented over and over again.  It
1204    would be nice to be able to use the routines from glib -- see
1205    www.gtk.org for details.  However, that would make Wget depend on
1206    glib, and I want to avoid dependencies to external libraries for
1207    reasons of convenience and portability (I suspect Wget is more
1208    portable than anything ever written for Gnome).  */
1209
1210 /* Append an element to the list.  If the list has a huge number of
1211    elements, this can get slow because it has to find the list's
1212    ending.  If you think you have to call slist_append in a loop,
1213    think about calling slist_prepend() followed by slist_nreverse().  */
1214
1215 slist *
1216 slist_append (slist *l, const char *s)
1217 {
1218   slist *newel = (slist *)xmalloc (sizeof (slist));
1219   slist *beg = l;
1220
1221   newel->string = xstrdup (s);
1222   newel->next = NULL;
1223
1224   if (!l)
1225     return newel;
1226   /* Find the last element.  */
1227   while (l->next)
1228     l = l->next;
1229   l->next = newel;
1230   return beg;
1231 }
1232
1233 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1234
1235 slist *
1236 slist_prepend (slist *l, const char *s)
1237 {
1238   slist *newel = (slist *)xmalloc (sizeof (slist));
1239   newel->string = xstrdup (s);
1240   newel->next = l;
1241   return newel;
1242 }
1243
1244 /* Destructively reverse L. */
1245
1246 slist *
1247 slist_nreverse (slist *l)
1248 {
1249   slist *prev = NULL;
1250   while (l)
1251     {
1252       slist *next = l->next;
1253       l->next = prev;
1254       prev = l;
1255       l = next;
1256     }
1257   return prev;
1258 }
1259
1260 /* Is there a specific entry in the list?  */
1261 int
1262 slist_contains (slist *l, const char *s)
1263 {
1264   for (; l; l = l->next)
1265     if (!strcmp (l->string, s))
1266       return 1;
1267   return 0;
1268 }
1269
1270 /* Free the whole slist.  */
1271 void
1272 slist_free (slist *l)
1273 {
1274   while (l)
1275     {
1276       slist *n = l->next;
1277       xfree (l->string);
1278       xfree (l);
1279       l = n;
1280     }
1281 }
1282 \f
1283 /* Sometimes it's useful to create "sets" of strings, i.e. special
1284    hash tables where you want to store strings as keys and merely
1285    query for their existence.  Here is a set of utility routines that
1286    makes that transparent.  */
1287
1288 void
1289 string_set_add (struct hash_table *ht, const char *s)
1290 {
1291   /* First check whether the set element already exists.  If it does,
1292      do nothing so that we don't have to free() the old element and
1293      then strdup() a new one.  */
1294   if (hash_table_contains (ht, s))
1295     return;
1296
1297   /* We use "1" as value.  It provides us a useful and clear arbitrary
1298      value, and it consumes no memory -- the pointers to the same
1299      string "1" will be shared by all the key-value pairs in all `set'
1300      hash tables.  */
1301   hash_table_put (ht, xstrdup (s), "1");
1302 }
1303
1304 /* Synonym for hash_table_contains... */
1305
1306 int
1307 string_set_contains (struct hash_table *ht, const char *s)
1308 {
1309   return hash_table_contains (ht, s);
1310 }
1311
1312 static int
1313 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1314 {
1315   xfree (key);
1316   return 0;
1317 }
1318
1319 void
1320 string_set_free (struct hash_table *ht)
1321 {
1322   hash_table_map (ht, string_set_free_mapper, NULL);
1323   hash_table_destroy (ht);
1324 }
1325
1326 static int
1327 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1328 {
1329   xfree (key);
1330   xfree (value);
1331   return 0;
1332 }
1333
1334 /* Another utility function: call free() on all keys and values of HT.  */
1335
1336 void
1337 free_keys_and_values (struct hash_table *ht)
1338 {
1339   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1340 }
1341
1342 \f
1343 /* Engine for legible and legible_very_long; this function works on
1344    strings.  */
1345
1346 static char *
1347 legible_1 (const char *repr)
1348 {
1349   static char outbuf[128];
1350   int i, i1, mod;
1351   char *outptr;
1352   const char *inptr;
1353
1354   /* Reset the pointers.  */
1355   outptr = outbuf;
1356   inptr = repr;
1357   /* If the number is negative, shift the pointers.  */
1358   if (*inptr == '-')
1359     {
1360       *outptr++ = '-';
1361       ++inptr;
1362     }
1363   /* How many digits before the first separator?  */
1364   mod = strlen (inptr) % 3;
1365   /* Insert them.  */
1366   for (i = 0; i < mod; i++)
1367     *outptr++ = inptr[i];
1368   /* Now insert the rest of them, putting separator before every
1369      third digit.  */
1370   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1371     {
1372       if (i % 3 == 0 && i1 != 0)
1373         *outptr++ = ',';
1374       *outptr++ = inptr[i1];
1375     }
1376   /* Zero-terminate the string.  */
1377   *outptr = '\0';
1378   return outbuf;
1379 }
1380
1381 /* Legible -- return a static pointer to the legibly printed long.  */
1382 char *
1383 legible (long l)
1384 {
1385   char inbuf[24];
1386   /* Print the number into the buffer.  */
1387   long_to_string (inbuf, l);
1388   return legible_1 (inbuf);
1389 }
1390
1391 /* Write a string representation of NUMBER into the provided buffer.
1392    We cannot use sprintf() because we cannot be sure whether the
1393    platform supports printing of what we chose for VERY_LONG_TYPE.
1394
1395    Example: Gcc supports `long long' under many platforms, but on many
1396    of those the native libc knows nothing of it and therefore cannot
1397    print it.
1398
1399    How long BUFFER needs to be depends on the platform and the content
1400    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1401    bytes are sufficient.  Using more might be a good idea.
1402
1403    This function does not go through the hoops that long_to_string
1404    goes to because it doesn't aspire to be fast.  (It's called perhaps
1405    once in a Wget run.)  */
1406
1407 static void
1408 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1409 {
1410   int i = 0;
1411   int j;
1412
1413   /* Print the number backwards... */
1414   do
1415     {
1416       buffer[i++] = '0' + number % 10;
1417       number /= 10;
1418     }
1419   while (number);
1420
1421   /* ...and reverse the order of the digits. */
1422   for (j = 0; j < i / 2; j++)
1423     {
1424       char c = buffer[j];
1425       buffer[j] = buffer[i - 1 - j];
1426       buffer[i - 1 - j] = c;
1427     }
1428   buffer[i] = '\0';
1429 }
1430
1431 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1432 char *
1433 legible_very_long (VERY_LONG_TYPE l)
1434 {
1435   char inbuf[128];
1436   /* Print the number into the buffer.  */
1437   very_long_to_string (inbuf, l);
1438   return legible_1 (inbuf);
1439 }
1440
1441 /* Count the digits in a (long) integer.  */
1442 int
1443 numdigit (long a)
1444 {
1445   int res = 1;
1446   if (a < 0)
1447     {
1448       a = -a;
1449       ++res;
1450     }
1451   while ((a /= 10) != 0)
1452     ++res;
1453   return res;
1454 }
1455
1456 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1457 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1458
1459 #define DIGITS_1(figure) ONE_DIGIT (figure)
1460 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1461 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1462 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1463 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1464 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1465 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1466 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1467 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1468 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1469
1470 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1471
1472 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1473 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1474 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1475 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1476 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1477 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1478 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1479 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1480 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1481
1482 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1483    to `sprintf(buffer, "%ld", number)', only much faster.
1484
1485    The speedup may make a difference in programs that frequently
1486    convert numbers to strings.  Some implementations of sprintf,
1487    particularly the one in GNU libc, have been known to be extremely
1488    slow compared to this function.
1489
1490    BUFFER should accept as many bytes as you expect the number to take
1491    up.  On machines with 64-bit longs the maximum needed size is 24
1492    bytes.  That includes the worst-case digits, the optional `-' sign,
1493    and the trailing \0.  */
1494
1495 void
1496 long_to_string (char *buffer, long number)
1497 {
1498   char *p = buffer;
1499   long n = number;
1500
1501 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1502   /* We are running in a strange or misconfigured environment.  Let
1503      sprintf cope with it.  */
1504   sprintf (buffer, "%ld", n);
1505 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1506
1507   if (n < 0)
1508     {
1509       *p++ = '-';
1510       n = -n;
1511     }
1512
1513   if      (n < 10)                   { DIGITS_1 (1); }
1514   else if (n < 100)                  { DIGITS_2 (10); }
1515   else if (n < 1000)                 { DIGITS_3 (100); }
1516   else if (n < 10000)                { DIGITS_4 (1000); }
1517   else if (n < 100000)               { DIGITS_5 (10000); }
1518   else if (n < 1000000)              { DIGITS_6 (100000); }
1519   else if (n < 10000000)             { DIGITS_7 (1000000); }
1520   else if (n < 100000000)            { DIGITS_8 (10000000); }
1521   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1522 #if SIZEOF_LONG == 4
1523   /* ``if (1)'' serves only to preserve editor indentation. */
1524   else if (1)                        { DIGITS_10 (1000000000); }
1525 #else  /* SIZEOF_LONG != 4 */
1526   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1527   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1528   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1529   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1530   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1531   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1532   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1533   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1534   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1535   else                               { DIGITS_19 (1000000000000000000L); }
1536 #endif /* SIZEOF_LONG != 4 */
1537
1538   *p = '\0';
1539 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1540 }
1541
1542 #undef ONE_DIGIT
1543 #undef ONE_DIGIT_ADVANCE
1544
1545 #undef DIGITS_1
1546 #undef DIGITS_2
1547 #undef DIGITS_3
1548 #undef DIGITS_4
1549 #undef DIGITS_5
1550 #undef DIGITS_6
1551 #undef DIGITS_7
1552 #undef DIGITS_8
1553 #undef DIGITS_9
1554 #undef DIGITS_10
1555 #undef DIGITS_11
1556 #undef DIGITS_12
1557 #undef DIGITS_13
1558 #undef DIGITS_14
1559 #undef DIGITS_15
1560 #undef DIGITS_16
1561 #undef DIGITS_17
1562 #undef DIGITS_18
1563 #undef DIGITS_19
1564 \f
1565 /* Support for timers. */
1566
1567 #undef TIMER_WINDOWS
1568 #undef TIMER_GETTIMEOFDAY
1569 #undef TIMER_TIME
1570
1571 /* Depending on the OS and availability of gettimeofday(), one and
1572    only one of the above constants will be defined.  Virtually all
1573    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1574    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1575    non-Windows systems without gettimeofday.
1576
1577    #### Perhaps we should also support ftime(), which exists on old
1578    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1579    C, if memory serves me.)  */
1580
1581 #ifdef WINDOWS
1582 # define TIMER_WINDOWS
1583 #else  /* not WINDOWS */
1584 # ifdef HAVE_GETTIMEOFDAY
1585 #  define TIMER_GETTIMEOFDAY
1586 # else
1587 #  define TIMER_TIME
1588 # endif
1589 #endif /* not WINDOWS */
1590
1591 struct wget_timer {
1592 #ifdef TIMER_GETTIMEOFDAY
1593   long secs;
1594   long usecs;
1595 #endif
1596
1597 #ifdef TIMER_TIME
1598   time_t secs;
1599 #endif
1600
1601 #ifdef TIMER_WINDOWS
1602   ULARGE_INTEGER wintime;
1603 #endif
1604 };
1605
1606 /* Allocate a timer.  It is not legal to do anything with a freshly
1607    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1608
1609 struct wget_timer *
1610 wtimer_allocate (void)
1611 {
1612   struct wget_timer *wt =
1613     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1614   return wt;
1615 }
1616
1617 /* Allocate a new timer and reset it.  Return the new timer. */
1618
1619 struct wget_timer *
1620 wtimer_new (void)
1621 {
1622   struct wget_timer *wt = wtimer_allocate ();
1623   wtimer_reset (wt);
1624   return wt;
1625 }
1626
1627 /* Free the resources associated with the timer.  Its further use is
1628    prohibited.  */
1629
1630 void
1631 wtimer_delete (struct wget_timer *wt)
1632 {
1633   xfree (wt);
1634 }
1635
1636 /* Reset timer WT.  This establishes the starting point from which
1637    wtimer_elapsed() will return the number of elapsed
1638    milliseconds.  It is allowed to reset a previously used timer.  */
1639
1640 void
1641 wtimer_reset (struct wget_timer *wt)
1642 {
1643 #ifdef TIMER_GETTIMEOFDAY
1644   struct timeval t;
1645   gettimeofday (&t, NULL);
1646   wt->secs  = t.tv_sec;
1647   wt->usecs = t.tv_usec;
1648 #endif
1649
1650 #ifdef TIMER_TIME
1651   wt->secs = time (NULL);
1652 #endif
1653
1654 #ifdef TIMER_WINDOWS
1655   FILETIME ft;
1656   SYSTEMTIME st;
1657   GetSystemTime (&st);
1658   SystemTimeToFileTime (&st, &ft);
1659   wt->wintime.HighPart = ft.dwHighDateTime;
1660   wt->wintime.LowPart  = ft.dwLowDateTime;
1661 #endif
1662 }
1663
1664 /* Return the number of milliseconds elapsed since the timer was last
1665    reset.  It is allowed to call this function more than once to get
1666    increasingly higher elapsed values.  */
1667
1668 long
1669 wtimer_elapsed (struct wget_timer *wt)
1670 {
1671 #ifdef TIMER_GETTIMEOFDAY
1672   struct timeval t;
1673   gettimeofday (&t, NULL);
1674   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1675 #endif
1676
1677 #ifdef TIMER_TIME
1678   time_t now = time (NULL);
1679   return 1000 * (now - wt->secs);
1680 #endif
1681
1682 #ifdef WINDOWS
1683   FILETIME ft;
1684   SYSTEMTIME st;
1685   ULARGE_INTEGER uli;
1686   GetSystemTime (&st);
1687   SystemTimeToFileTime (&st, &ft);
1688   uli.HighPart = ft.dwHighDateTime;
1689   uli.LowPart = ft.dwLowDateTime;
1690   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1691 #endif
1692 }
1693
1694 /* Return the assessed granularity of the timer implementation.  This
1695    is important for certain code that tries to deal with "zero" time
1696    intervals.  */
1697
1698 long
1699 wtimer_granularity (void)
1700 {
1701 #ifdef TIMER_GETTIMEOFDAY
1702   /* Granularity of gettimeofday is hugely architecture-dependent.
1703      However, it appears that on modern machines it is better than
1704      1ms.  */
1705   return 1;
1706 #endif
1707
1708 #ifdef TIMER_TIME
1709   /* This is clear. */
1710   return 1000;
1711 #endif
1712
1713 #ifdef TIMER_WINDOWS
1714   /* ? */
1715   return 1;
1716 #endif
1717 }
1718 \f
1719 /* This should probably be at a better place, but it doesn't really
1720    fit into html-parse.c.  */
1721
1722 /* The function returns the pointer to the malloc-ed quoted version of
1723    string s.  It will recognize and quote numeric and special graphic
1724    entities, as per RFC1866:
1725
1726    `&' -> `&amp;'
1727    `<' -> `&lt;'
1728    `>' -> `&gt;'
1729    `"' -> `&quot;'
1730    SP  -> `&#32;'
1731
1732    No other entities are recognized or replaced.  */
1733 char *
1734 html_quote_string (const char *s)
1735 {
1736   const char *b = s;
1737   char *p, *res;
1738   int i;
1739
1740   /* Pass through the string, and count the new size.  */
1741   for (i = 0; *s; s++, i++)
1742     {
1743       if (*s == '&')
1744         i += 4;                 /* `amp;' */
1745       else if (*s == '<' || *s == '>')
1746         i += 3;                 /* `lt;' and `gt;' */
1747       else if (*s == '\"')
1748         i += 5;                 /* `quot;' */
1749       else if (*s == ' ')
1750         i += 4;                 /* #32; */
1751     }
1752   res = (char *)xmalloc (i + 1);
1753   s = b;
1754   for (p = res; *s; s++)
1755     {
1756       switch (*s)
1757         {
1758         case '&':
1759           *p++ = '&';
1760           *p++ = 'a';
1761           *p++ = 'm';
1762           *p++ = 'p';
1763           *p++ = ';';
1764           break;
1765         case '<': case '>':
1766           *p++ = '&';
1767           *p++ = (*s == '<' ? 'l' : 'g');
1768           *p++ = 't';
1769           *p++ = ';';
1770           break;
1771         case '\"':
1772           *p++ = '&';
1773           *p++ = 'q';
1774           *p++ = 'u';
1775           *p++ = 'o';
1776           *p++ = 't';
1777           *p++ = ';';
1778           break;
1779         case ' ':
1780           *p++ = '&';
1781           *p++ = '#';
1782           *p++ = '3';
1783           *p++ = '2';
1784           *p++ = ';';
1785           break;
1786         default:
1787           *p++ = *s;
1788         }
1789     }
1790   *p = '\0';
1791   return res;
1792 }
1793
1794 /* Determine the width of the terminal we're running on.  If that's
1795    not possible, return 0.  */
1796
1797 int
1798 determine_screen_width (void)
1799 {
1800   /* If there's a way to get the terminal size using POSIX
1801      tcgetattr(), somebody please tell me.  */
1802 #ifndef TIOCGWINSZ
1803   return 0;
1804 #else  /* TIOCGWINSZ */
1805   int fd;
1806   struct winsize wsz;
1807
1808   if (opt.lfilename != NULL)
1809     return 0;
1810
1811   fd = fileno (stderr);
1812   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1813     return 0;                   /* most likely ENOTTY */
1814
1815   return wsz.ws_col;
1816 #endif /* TIOCGWINSZ */
1817 }
1818
1819 #if 0
1820 /* A debugging function for checking whether an MD5 library works. */
1821
1822 #include "gen-md5.h"
1823
1824 char *
1825 debug_test_md5 (char *buf)
1826 {
1827   unsigned char raw[16];
1828   static char res[33];
1829   unsigned char *p1;
1830   char *p2;
1831   int cnt;
1832   ALLOCA_MD5_CONTEXT (ctx);
1833
1834   gen_md5_init (ctx);
1835   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1836   gen_md5_finish (ctx, raw);
1837
1838   p1 = raw;
1839   p2 = res;
1840   cnt = 16;
1841   while (cnt--)
1842     {
1843       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1844       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1845       ++p1;
1846     }
1847   *p2 = '\0';
1848
1849   return res;
1850 }
1851 #endif