sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53 #ifdef HAVE_SYS_IOCTL_H
  54 # include <sys/ioctl.h>
  55 #endif
  56
  57 #include "wget.h"
  58 #include "utils.h"
  59 #include "fnmatch.h"
  60 #include "hash.h"
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* This section implements several wrappers around the basic
  67    allocation routines.  This is done for two reasons: first, so that
  68    the callers of these functions need not consistently check for
  69    errors.  If there is not enough virtual memory for running Wget,
  70    something is seriously wrong, and Wget exits with an appropriate
  71    error message.
  72
  73    The second reason why these are useful is that, if DEBUG_MALLOC is
  74    defined, they also provide a handy (if crude) malloc debugging
  75    interface that checks memory leaks.  */
  76
  77 /* Croak the fatal memory error and bail out with non-zero exit
  78    status.  */
  79 static void
  80 memfatal (const char *what)
  81 {
  82   /* HACK: expose save_log_p from log.c, so we can turn it off in
  83      order to prevent saving the log.  Saving the log is dangerous
  84      because logprintf() and logputs() can call malloc(), so this
  85      could infloop.  When logging is turned off, infloop can no longer
  86      happen.
  87
  88      #### This is no longer really necessary because the new routines
  89      in log.c cons only if the line exceeds eighty characters.  But
  90      this can come at the end of a line, so it's OK to be careful.
  91
  92      On a more serious note, it would be good to have a
  93      log_forced_shutdown() routine that exposes this cleanly.  */
  94   extern int save_log_p;
  95
  96   save_log_p = 0;
  97   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  98   exit (1);
  99 }
 100
 101 /* These functions end with _real because they need to be
 102    distinguished from the debugging functions, and from the macros.
 103    Explanation follows:
 104
 105    If memory debugging is not turned on, wget.h defines these:
 106
 107      #define xmalloc xmalloc_real
 108      #define xrealloc xrealloc_real
 109      #define xstrdup xstrdup_real
 110      #define xfree free
 111
 112    In case of memory debugging, the definitions are a bit more
 113    complex, because we want to provide more information, *and* we want
 114    to call the debugging code.  (The former is the reason why xmalloc
 115    and friends need to be macros in the first place.)  Then it looks
 116    like this:
 117
 118      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 119      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 120      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 121      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 122
 123    Each of the *_debug function does its magic and calls the real one.  */
 124
 125 #ifdef DEBUG_MALLOC
 126 # define STATIC_IF_DEBUG static
 127 #else
 128 # define STATIC_IF_DEBUG
 129 #endif
 130
 131 STATIC_IF_DEBUG void *
 132 xmalloc_real (size_t size)
 133 {
 134   void *ptr = malloc (size);
 135   if (!ptr)
 136     memfatal ("malloc");
 137   return ptr;
 138 }
 139
 140 STATIC_IF_DEBUG void *
 141 xrealloc_real (void *ptr, size_t newsize)
 142 {
 143   void *newptr;
 144
 145   /* Not all Un*xes have the feature of realloc() that calling it with
 146      a NULL-pointer is the same as malloc(), but it is easy to
 147      simulate.  */
 148   if (ptr)
 149     newptr = realloc (ptr, newsize);
 150   else
 151     newptr = malloc (newsize);
 152   if (!newptr)
 153     memfatal ("realloc");
 154   return newptr;
 155 }
 156
 157 STATIC_IF_DEBUG char *
 158 xstrdup_real (const char *s)
 159 {
 160   char *copy;
 161
 162 #ifndef HAVE_STRDUP
 163   int l = strlen (s);
 164   copy = malloc (l + 1);
 165   if (!copy)
 166     memfatal ("strdup");
 167   memcpy (copy, s, l + 1);
 168 #else  /* HAVE_STRDUP */
 169   copy = strdup (s);
 170   if (!copy)
 171     memfatal ("strdup");
 172 #endif /* HAVE_STRDUP */
 173
 174   return copy;
 175 }
 176
 177 #ifdef DEBUG_MALLOC
 178
 179 /* Crude home-grown routines for debugging some malloc-related
 180    problems.  Featured:
 181
 182    * Counting the number of malloc and free invocations, and reporting
 183      the "balance", i.e. how many times more malloc was called than it
 184      was the case with free.
 185
 186    * Making malloc store its entry into a simple array and free remove
 187      stuff from that array.  At the end, print the pointers which have
 188      not been freed, along with the source file and the line number.
 189      This also has the side-effect of detecting freeing memory that
 190      was never allocated.
 191
 192    Note that this kind of memory leak checking strongly depends on
 193    every malloc() being followed by a free(), even if the program is
 194    about to finish.  Wget is careful to free the data structure it
 195    allocated in init.c.  */
 196
 197 static int malloc_count, free_count;
 198
 199 static struct {
 200   char *ptr;
 201   const char *file;
 202   int line;
 203 } malloc_debug[100000];
 204
 205 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 206    which can be a real problem.  It would be nice to use a hash table
 207    for malloc_debug, but the functions in hash.c are not suitable
 208    because they can call malloc() themselves.  Maybe it would work if
 209    the hash table were preallocated to a huge size, and if we set the
 210    rehash threshold to 1.0.  */
 211
 212 /* Register PTR in malloc_debug.  Abort if this is not possible
 213    (presumably due to the number of current allocations exceeding the
 214    size of malloc_debug.)  */
 215
 216 static void
 217 register_ptr (void *ptr, const char *file, int line)
 218 {
 219   int i;
 220   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 221     if (malloc_debug[i].ptr == NULL)
 222       {
 223         malloc_debug[i].ptr = ptr;
 224         malloc_debug[i].file = file;
 225         malloc_debug[i].line = line;
 226         return;
 227       }
 228   abort ();
 229 }
 230
 231 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 232    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 233
 234 static void
 235 unregister_ptr (void *ptr)
 236 {
 237   int i;
 238   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 239     if (malloc_debug[i].ptr == ptr)
 240       {
 241         malloc_debug[i].ptr = NULL;
 242         return;
 243       }
 244   abort ();
 245 }
 246
 247 /* Print the malloc debug stats that can be gathered from the above
 248    information.  Currently this is the count of mallocs, frees, the
 249    difference between the two, and the dump of the contents of
 250    malloc_debug.  The last part are the memory leaks.  */
 251
 252 void
 253 print_malloc_debug_stats (void)
 254 {
 255   int i;
 256   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 257           malloc_count, free_count, malloc_count - free_count);
 258   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 259     if (malloc_debug[i].ptr != NULL)
 260       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 261               malloc_debug[i].file, malloc_debug[i].line);
 262 }
 263
 264 void *
 265 xmalloc_debug (size_t size, const char *source_file, int source_line)
 266 {
 267   void *ptr = xmalloc_real (size);
 268   ++malloc_count;
 269   register_ptr (ptr, source_file, source_line);
 270   return ptr;
 271 }
 272
 273 void
 274 xfree_debug (void *ptr, const char *source_file, int source_line)
 275 {
 276   assert (ptr != NULL);
 277   ++free_count;
 278   unregister_ptr (ptr);
 279   free (ptr);
 280 }
 281
 282 void *
 283 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 284 {
 285   void *newptr = xrealloc_real (ptr, newsize);
 286   if (!ptr)
 287     {
 288       ++malloc_count;
 289       register_ptr (newptr, source_file, source_line);
 290     }
 291   else if (newptr != ptr)
 292     {
 293       unregister_ptr (ptr);
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   return newptr;
 297 }
 298
 299 char *
 300 xstrdup_debug (const char *s, const char *source_file, int source_line)
 301 {
 302   char *copy = xstrdup_real (s);
 303   ++malloc_count;
 304   register_ptr (copy, source_file, source_line);
 305   return copy;
 306 }
 307
 308 #endif /* DEBUG_MALLOC */
 309 \f
 310 /* Utility function: like xstrdup(), but also lowercases S.  */
 311
 312 char *
 313 xstrdup_lower (const char *s)
 314 {
 315   char *copy = xstrdup (s);
 316   char *p = copy;
 317   for (; *p; p++)
 318     *p = TOLOWER (*p);
 319   return copy;
 320 }
 321
 322 /* Return a count of how many times CHR occurs in STRING. */
 323
 324 int
 325 count_char (const char *string, char chr)
 326 {
 327   const char *p;
 328   int count = 0;
 329   for (p = string; *p; p++)
 330     if (*p == chr)
 331       ++count;
 332   return count;
 333 }
 334
 335 /* Copy the string formed by two pointers (one on the beginning, other
 336    on the char after the last char) to a new, malloc-ed location.
 337    0-terminate it.  */
 338 char *
 339 strdupdelim (const char *beg, const char *end)
 340 {
 341   char *res = (char *)xmalloc (end - beg + 1);
 342   memcpy (res, beg, end - beg);
 343   res[end - beg] = '\0';
 344   return res;
 345 }
 346
 347 /* Parse a string containing comma-separated elements, and return a
 348    vector of char pointers with the elements.  Spaces following the
 349    commas are ignored.  */
 350 char **
 351 sepstring (const char *s)
 352 {
 353   char **res;
 354   const char *p;
 355   int i = 0;
 356
 357   if (!s || !*s)
 358     return NULL;
 359   res = NULL;
 360   p = s;
 361   while (*s)
 362     {
 363       if (*s == ',')
 364         {
 365           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 366           res[i] = strdupdelim (p, s);
 367           res[++i] = NULL;
 368           ++s;
 369           /* Skip the blanks following the ','.  */
 370           while (ISSPACE (*s))
 371             ++s;
 372           p = s;
 373         }
 374       else
 375         ++s;
 376     }
 377   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 378   res[i] = strdupdelim (p, s);
 379   res[i + 1] = NULL;
 380   return res;
 381 }
 382 \f
 383 /* Return pointer to a static char[] buffer in which zero-terminated
 384    string-representation of TM (in form hh:mm:ss) is printed.
 385
 386    If TM is non-NULL, the current time-in-seconds will be stored
 387    there.
 388
 389    (#### This is misleading: one would expect TM would be used instead
 390    of the current time in that case.  This design was probably
 391    influenced by the design time(2), and should be changed at some
 392    points.  No callers use non-NULL TM anyway.)  */
 393
 394 char *
 395 time_str (time_t *tm)
 396 {
 397   static char output[15];
 398   struct tm *ptm;
 399   time_t secs = time (tm);
 400
 401   if (secs == -1)
 402     {
 403       /* In case of error, return the empty string.  Maybe we should
 404          just abort if this happens?  */
 405       *output = '\0';
 406       return output;
 407     }
 408   ptm = localtime (&secs);
 409   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 410   return output;
 411 }
 412
 413 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 414
 415 char *
 416 datetime_str (time_t *tm)
 417 {
 418   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 419   struct tm *ptm;
 420   time_t secs = time (tm);
 421
 422   if (secs == -1)
 423     {
 424       /* In case of error, return the empty string.  Maybe we should
 425          just abort if this happens?  */
 426       *output = '\0';
 427       return output;
 428     }
 429   ptm = localtime (&secs);
 430   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 431            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 432            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 433   return output;
 434 }
 435 \f
 436 /* The Windows versions of the following two functions are defined in
 437    mswindows.c.  */
 438
 439 #ifndef WINDOWS
 440 void
 441 fork_to_background (void)
 442 {
 443   pid_t pid;
 444   /* Whether we arrange our own version of opt.lfilename here.  */
 445   int changedp = 0;
 446
 447   if (!opt.lfilename)
 448     {
 449       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 450       changedp = 1;
 451     }
 452   pid = fork ();
 453   if (pid < 0)
 454     {
 455       /* parent, error */
 456       perror ("fork");
 457       exit (1);
 458     }
 459   else if (pid != 0)
 460     {
 461       /* parent, no error */
 462       printf (_("Continuing in background.\n"));
 463       if (changedp)
 464         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 465       exit (0);
 466     }
 467   /* child: keep running */
 468 }
 469 #endif /* not WINDOWS */
 470 \f
 471 #if 0
 472 /* debug */
 473 char *
 474 ps (char *orig)
 475 {
 476   char *r = xstrdup (orig);
 477   path_simplify (r);
 478   return r;
 479 }
 480 #endif
 481
 482 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 483    in that:
 484         Multple `/'s are collapsed to a single `/'.
 485         Leading `./'s and trailing `/.'s are removed.
 486         Trailing `/'s are removed.
 487         Non-leading `../'s and trailing `..'s are handled by removing
 488         portions of the path.
 489
 490    E.g. "a/b/c/./../d/.." will yield "a/b/".  This function originates
 491    from GNU Bash and has been mutilated to unrecognition for use in
 492    Wget.
 493
 494    Changes for Wget:
 495         Always use '/' as stub_char.
 496         Don't check for local things using canon_stat.
 497         Change the original string instead of strdup-ing.
 498         React correctly when beginning with `./' and `../'.
 499         Don't zip out trailing slashes.
 500         Return a value indicating whether any modifications took place.
 501
 502    If you dare change this function, take a careful look at the test
 503    cases below, and make sure that they pass.  */
 504
 505 int
 506 path_simplify (char *path)
 507 {
 508   register int i, start;
 509   int changes = 0;
 510   char stub_char;
 511
 512   if (!*path)
 513     return 0;
 514
 515   stub_char = '/';
 516
 517   if (path[0] == '/')
 518     /* Preserve initial '/'. */
 519     ++path;
 520
 521   /* Nix out leading `.' or `..' with.  */
 522   if ((path[0] == '.' && path[1] == '\0')
 523       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 524     {
 525       path[0] = '\0';
 526       changes = 1;
 527       return changes;
 528     }
 529
 530   /* Walk along PATH looking for things to compact.  */
 531   i = 0;
 532   while (1)
 533     {
 534       if (!path[i])
 535         break;
 536
 537       while (path[i] && path[i] != '/')
 538         i++;
 539
 540       start = i++;
 541
 542       /* If we didn't find any slashes, then there is nothing left to do.  */
 543       if (!path[start])
 544         break;
 545
 546       /* Handle multiple `/'s in a row.  */
 547       while (path[i] == '/')
 548         i++;
 549
 550       if ((start + 1) != i)
 551         {
 552           strcpy (path + start + 1, path + i);
 553           i = start + 1;
 554           changes = 1;
 555         }
 556
 557       /* Check for `../', `./' or trailing `.' by itself.  */
 558       if (path[i] == '.')
 559         {
 560           /* Handle trailing `.' by itself.  */
 561           if (!path[i + 1])
 562             {
 563               path[--i] = '\0';
 564               changes = 1;
 565               break;
 566             }
 567
 568           /* Handle `./'.  */
 569           if (path[i + 1] == '/')
 570             {
 571               strcpy (path + i, path + i + 1);
 572               i = (start < 0) ? 0 : start;
 573               changes = 1;
 574               continue;
 575             }
 576
 577           /* Handle `../' or trailing `..' by itself.  */
 578           if (path[i + 1] == '.' &&
 579               (path[i + 2] == '/' || !path[i + 2]))
 580             {
 581               while (--start > -1 && path[start] != '/');
 582               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 583               i = (start < 0) ? 0 : start;
 584               changes = 1;
 585               continue;
 586             }
 587         }       /* path == '.' */
 588     } /* while */
 589
 590   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 591   i = 0;
 592   while (1)
 593     {
 594       if (path[i] == '.' && path[i + 1] == '/')
 595         i += 2;
 596       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 597         i += 3;
 598       else
 599         break;
 600     }
 601   if (i)
 602     {
 603       strcpy (path, path + i - 0);
 604       changes = 1;
 605     }
 606
 607   return changes;
 608 }
 609
 610 /* Test cases:
 611    ps("")                   -> ""
 612    ps("/")                  -> "/"
 613    ps(".")                  -> ""
 614    ps("..")                 -> ""
 615    ps("/.")                 -> "/"
 616    ps("/..")                -> "/"
 617    ps("foo")                -> "foo"
 618    ps("foo/bar")            -> "foo/bar"
 619    ps("foo//bar")           -> "foo/bar"             (possibly a bug)
 620    ps("foo/../bar")         -> "bar"
 621    ps("foo/bar/..")         -> "foo/"
 622    ps("foo/bar/../x")       -> "foo/x"
 623    ps("foo/bar/../x/")      -> "foo/x/"
 624    ps("foo/..")             -> ""
 625    ps("/foo/..")            -> "/"
 626    ps("a/b/../../c")        -> "c"
 627    ps("/a/b/../../c")       -> "/c"
 628    ps("./a/../b")           -> "b"
 629    ps("/./a/../b")          -> "/b"
 630 */
 631 \f
 632 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 633    specified with TM.  */
 634 void
 635 touch (const char *file, time_t tm)
 636 {
 637 #ifdef HAVE_STRUCT_UTIMBUF
 638   struct utimbuf times;
 639   times.actime = times.modtime = tm;
 640 #else
 641   time_t times[2];
 642   times[0] = times[1] = tm;
 643 #endif
 644
 645   if (utime (file, &times) == -1)
 646     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 647 }
 648
 649 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 650    nothing under MS-Windows.  */
 651 int
 652 remove_link (const char *file)
 653 {
 654   int err = 0;
 655   struct stat st;
 656
 657   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 658     {
 659       DEBUGP (("Unlinking %s (symlink).\n", file));
 660       err = unlink (file);
 661       if (err != 0)
 662         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 663                    file, strerror (errno));
 664     }
 665   return err;
 666 }
 667
 668 /* Does FILENAME exist?  This is quite a lousy implementation, since
 669    it supplies no error codes -- only a yes-or-no answer.  Thus it
 670    will return that a file does not exist if, e.g., the directory is
 671    unreadable.  I don't mind it too much currently, though.  The
 672    proper way should, of course, be to have a third, error state,
 673    other than true/false, but that would introduce uncalled-for
 674    additional complexity to the callers.  */
 675 int
 676 file_exists_p (const char *filename)
 677 {
 678 #ifdef HAVE_ACCESS
 679   return access (filename, F_OK) >= 0;
 680 #else
 681   struct stat buf;
 682   return stat (filename, &buf) >= 0;
 683 #endif
 684 }
 685
 686 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 687    Returns 0 on error.  */
 688 int
 689 file_non_directory_p (const char *path)
 690 {
 691   struct stat buf;
 692   /* Use lstat() rather than stat() so that symbolic links pointing to
 693      directories can be identified correctly.  */
 694   if (lstat (path, &buf) != 0)
 695     return 0;
 696   return S_ISDIR (buf.st_mode) ? 0 : 1;
 697 }
 698
 699 /* Return a unique filename, given a prefix and count */
 700 static char *
 701 unique_name_1 (const char *fileprefix, int count)
 702 {
 703   char *filename;
 704
 705   if (count)
 706     {
 707       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 708       sprintf (filename, "%s.%d", fileprefix, count);
 709     }
 710   else
 711     filename = xstrdup (fileprefix);
 712
 713   if (!file_exists_p (filename))
 714     return filename;
 715   else
 716     {
 717       xfree (filename);
 718       return NULL;
 719     }
 720 }
 721
 722 /* Return a unique file name, based on PREFIX.  */
 723 char *
 724 unique_name (const char *prefix)
 725 {
 726   char *file = NULL;
 727   int count = 0;
 728
 729   while (!file)
 730     file = unique_name_1 (prefix, count++);
 731   return file;
 732 }
 733 \f
 734 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 735    are missing, create them first.  In case any mkdir() call fails,
 736    return its error status.  Returns 0 on successful completion.
 737
 738    The behaviour of this function should be identical to the behaviour
 739    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 740 int
 741 make_directory (const char *directory)
 742 {
 743   int quit = 0;
 744   int i;
 745   char *dir;
 746
 747   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 748      function is unsafe if called with a read-only char *argument.  */
 749   STRDUP_ALLOCA (dir, directory);
 750
 751   /* If the first character of dir is '/', skip it (and thus enable
 752      creation of absolute-pathname directories.  */
 753   for (i = (*dir == '/'); 1; ++i)
 754     {
 755       for (; dir[i] && dir[i] != '/'; i++)
 756         ;
 757       if (!dir[i])
 758         quit = 1;
 759       dir[i] = '\0';
 760       /* Check whether the directory already exists.  */
 761       if (!file_exists_p (dir))
 762         {
 763           if (mkdir (dir, 0777) < 0)
 764             return -1;
 765         }
 766       if (quit)
 767         break;
 768       else
 769         dir[i] = '/';
 770     }
 771   return 0;
 772 }
 773
 774 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 775    should be a file name.
 776
 777    file_merge("/foo/bar", "baz")  => "/foo/baz"
 778    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 779    file_merge("foo", "bar")       => "bar"
 780
 781    In other words, it's a simpler and gentler version of uri_merge_1.  */
 782
 783 char *
 784 file_merge (const char *base, const char *file)
 785 {
 786   char *result;
 787   const char *cut = (const char *)strrchr (base, '/');
 788
 789   if (!cut)
 790     return xstrdup (file);
 791
 792   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 793   memcpy (result, base, cut - base);
 794   result[cut - base] = '/';
 795   strcpy (result + (cut - base) + 1, file);
 796
 797   return result;
 798 }
 799 \f
 800 static int in_acclist PARAMS ((const char *const *, const char *, int));
 801
 802 /* Determine whether a file is acceptable to be followed, according to
 803    lists of patterns to accept/reject.  */
 804 int
 805 acceptable (const char *s)
 806 {
 807   int l = strlen (s);
 808
 809   while (l && s[l] != '/')
 810     --l;
 811   if (s[l] == '/')
 812     s += (l + 1);
 813   if (opt.accepts)
 814     {
 815       if (opt.rejects)
 816         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 817                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 818       else
 819         return in_acclist ((const char *const *)opt.accepts, s, 1);
 820     }
 821   else if (opt.rejects)
 822     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 823   return 1;
 824 }
 825
 826 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 827    `/something', frontcmp() will return 1 only if S2 begins with
 828    `/something'.  Otherwise, 0 is returned.  */
 829 int
 830 frontcmp (const char *s1, const char *s2)
 831 {
 832   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 833   return !*s1;
 834 }
 835
 836 /* Iterate through STRLIST, and return the first element that matches
 837    S, through wildcards or front comparison (as appropriate).  */
 838 static char *
 839 proclist (char **strlist, const char *s, enum accd flags)
 840 {
 841   char **x;
 842
 843   for (x = strlist; *x; x++)
 844     if (has_wildcards_p (*x))
 845       {
 846         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 847           break;
 848       }
 849     else
 850       {
 851         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 852         if (frontcmp (p, s))
 853           break;
 854       }
 855   return *x;
 856 }
 857
 858 /* Returns whether DIRECTORY is acceptable for download, wrt the
 859    include/exclude lists.
 860
 861    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 862    and absolute paths may be freely intermixed.  */
 863 int
 864 accdir (const char *directory, enum accd flags)
 865 {
 866   /* Remove starting '/'.  */
 867   if (flags & ALLABS && *directory == '/')
 868     ++directory;
 869   if (opt.includes)
 870     {
 871       if (!proclist (opt.includes, directory, flags))
 872         return 0;
 873     }
 874   if (opt.excludes)
 875     {
 876       if (proclist (opt.excludes, directory, flags))
 877         return 0;
 878     }
 879   return 1;
 880 }
 881
 882 /* Match the end of STRING against PATTERN.  For instance:
 883
 884    match_backwards ("abc", "bc") -> 1
 885    match_backwards ("abc", "ab") -> 0
 886    match_backwards ("abc", "abc") -> 1 */
 887 int
 888 match_tail (const char *string, const char *pattern)
 889 {
 890   int i, j;
 891
 892   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 893     if (string[i] != pattern[j])
 894       break;
 895   /* If the pattern was exhausted, the match was succesful.  */
 896   if (j == -1)
 897     return 1;
 898   else
 899     return 0;
 900 }
 901
 902 /* Checks whether string S matches each element of ACCEPTS.  A list
 903    element are matched either with fnmatch() or match_tail(),
 904    according to whether the element contains wildcards or not.
 905
 906    If the BACKWARD is 0, don't do backward comparison -- just compare
 907    them normally.  */
 908 static int
 909 in_acclist (const char *const *accepts, const char *s, int backward)
 910 {
 911   for (; *accepts; accepts++)
 912     {
 913       if (has_wildcards_p (*accepts))
 914         {
 915           /* fnmatch returns 0 if the pattern *does* match the
 916              string.  */
 917           if (fnmatch (*accepts, s, 0) == 0)
 918             return 1;
 919         }
 920       else
 921         {
 922           if (backward)
 923             {
 924               if (match_tail (s, *accepts))
 925                 return 1;
 926             }
 927           else
 928             {
 929               if (!strcmp (s, *accepts))
 930                 return 1;
 931             }
 932         }
 933     }
 934   return 0;
 935 }
 936
 937 /* Return the location of STR's suffix (file extension).  Examples:
 938    suffix ("foo.bar")       -> "bar"
 939    suffix ("foo.bar.baz")   -> "baz"
 940    suffix ("/foo/bar")      -> NULL
 941    suffix ("/foo.bar/baz")  -> NULL  */
 942 char *
 943 suffix (const char *str)
 944 {
 945   int i;
 946
 947   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 948     ;
 949
 950   if (str[i++] == '.')
 951     return (char *)str + i;
 952   else
 953     return NULL;
 954 }
 955
 956 /* Read a line from FP.  The function reallocs the storage as needed
 957    to accomodate for any length of the line.  Reallocs are done
 958    storage exponentially, doubling the storage after each overflow to
 959    minimize the number of calls to realloc() and fgets().  The newline
 960    character at the end of line is retained.
 961
 962    After end-of-file is encountered without anything being read, NULL
 963    is returned.  NULL is also returned on error.  To distinguish
 964    between these two cases, use the stdio function ferror().  */
 965
 966 char *
 967 read_whole_line (FILE *fp)
 968 {
 969   int length = 0;
 970   int bufsize = 81;
 971   char *line = (char *)xmalloc (bufsize);
 972
 973   while (fgets (line + length, bufsize - length, fp))
 974     {
 975       length += strlen (line + length);
 976       assert (length > 0);
 977       if (line[length - 1] == '\n')
 978         break;
 979       /* fgets() guarantees to read the whole line, or to use up the
 980          space we've given it.  We can double the buffer
 981          unconditionally.  */
 982       bufsize <<= 1;
 983       line = xrealloc (line, bufsize);
 984     }
 985   if (length == 0 || ferror (fp))
 986     {
 987       xfree (line);
 988       return NULL;
 989     }
 990   if (length + 1 < bufsize)
 991     /* Relieve the memory from our exponential greediness.  We say
 992        `length + 1' because the terminating \0 is not included in
 993        LENGTH.  We don't need to zero-terminate the string ourselves,
 994        though, because fgets() does that.  */
 995     line = xrealloc (line, length + 1);
 996   return line;
 997 }
 998 \f
 999 /* Read FILE into memory.  A pointer to `struct file_memory' are
1000    returned; use struct element `content' to access file contents, and
1001    the element `length' to know the file length.  `content' is *not*
1002    zero-terminated, and you should *not* read or write beyond the [0,
1003    length) range of characters.
1004
1005    After you are done with the file contents, call read_file_free to
1006    release the memory.
1007
1008    Depending on the operating system and the type of file that is
1009    being read, read_file() either mmap's the file into memory, or
1010    reads the file into the core using read().
1011
1012    If file is named "-", fileno(stdin) is used for reading instead.
1013    If you want to read from a real file named "-", use "./-" instead.  */
1014
1015 struct file_memory *
1016 read_file (const char *file)
1017 {
1018   int fd;
1019   struct file_memory *fm;
1020   long size;
1021   int inhibit_close = 0;
1022
1023   /* Some magic in the finest tradition of Perl and its kin: if FILE
1024      is "-", just use stdin.  */
1025   if (HYPHENP (file))
1026     {
1027       fd = fileno (stdin);
1028       inhibit_close = 1;
1029       /* Note that we don't inhibit mmap() in this case.  If stdin is
1030          redirected from a regular file, mmap() will still work.  */
1031     }
1032   else
1033     fd = open (file, O_RDONLY);
1034   if (fd < 0)
1035     return NULL;
1036   fm = xmalloc (sizeof (struct file_memory));
1037
1038 #ifdef HAVE_MMAP
1039   {
1040     struct stat buf;
1041     if (fstat (fd, &buf) < 0)
1042       goto mmap_lose;
1043     fm->length = buf.st_size;
1044     /* NOTE: As far as I know, the callers of this function never
1045        modify the file text.  Relying on this would enable us to
1046        specify PROT_READ and MAP_SHARED for a marginal gain in
1047        efficiency, but at some cost to generality.  */
1048     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1049                         MAP_PRIVATE, fd, 0);
1050     if (fm->content == (char *)MAP_FAILED)
1051       goto mmap_lose;
1052     if (!inhibit_close)
1053       close (fd);
1054
1055     fm->mmap_p = 1;
1056     return fm;
1057   }
1058
1059  mmap_lose:
1060   /* The most common reason why mmap() fails is that FD does not point
1061      to a plain file.  However, it's also possible that mmap() doesn't
1062      work for a particular type of file.  Therefore, whenever mmap()
1063      fails, we just fall back to the regular method.  */
1064 #endif /* HAVE_MMAP */
1065
1066   fm->length = 0;
1067   size = 512;                   /* number of bytes fm->contents can
1068                                    hold at any given time. */
1069   fm->content = xmalloc (size);
1070   while (1)
1071     {
1072       long nread;
1073       if (fm->length > size / 2)
1074         {
1075           /* #### I'm not sure whether the whole exponential-growth
1076              thing makes sense with kernel read.  On Linux at least,
1077              read() refuses to read more than 4K from a file at a
1078              single chunk anyway.  But other Unixes might optimize it
1079              better, and it doesn't *hurt* anything, so I'm leaving
1080              it.  */
1081
1082           /* Normally, we grow SIZE exponentially to make the number
1083              of calls to read() and realloc() logarithmic in relation
1084              to file size.  However, read() can read an amount of data
1085              smaller than requested, and it would be unreasonably to
1086              double SIZE every time *something* was read.  Therefore,
1087              we double SIZE only when the length exceeds half of the
1088              entire allocated size.  */
1089           size <<= 1;
1090           fm->content = xrealloc (fm->content, size);
1091         }
1092       nread = read (fd, fm->content + fm->length, size - fm->length);
1093       if (nread > 0)
1094         /* Successful read. */
1095         fm->length += nread;
1096       else if (nread < 0)
1097         /* Error. */
1098         goto lose;
1099       else
1100         /* EOF */
1101         break;
1102     }
1103   if (!inhibit_close)
1104     close (fd);
1105   if (size > fm->length && fm->length != 0)
1106     /* Due to exponential growth of fm->content, the allocated region
1107        might be much larger than what is actually needed.  */
1108     fm->content = xrealloc (fm->content, fm->length);
1109   fm->mmap_p = 0;
1110   return fm;
1111
1112  lose:
1113   if (!inhibit_close)
1114     close (fd);
1115   xfree (fm->content);
1116   xfree (fm);
1117   return NULL;
1118 }
1119
1120 /* Release the resources held by FM.  Specifically, this calls
1121    munmap() or xfree() on fm->content, depending whether mmap or
1122    malloc/read were used to read in the file.  It also frees the
1123    memory needed to hold the FM structure itself.  */
1124
1125 void
1126 read_file_free (struct file_memory *fm)
1127 {
1128 #ifdef HAVE_MMAP
1129   if (fm->mmap_p)
1130     {
1131       munmap (fm->content, fm->length);
1132     }
1133   else
1134 #endif
1135     {
1136       xfree (fm->content);
1137     }
1138   xfree (fm);
1139 }
1140 \f
1141 /* Free the pointers in a NULL-terminated vector of pointers, then
1142    free the pointer itself.  */
1143 void
1144 free_vec (char **vec)
1145 {
1146   if (vec)
1147     {
1148       char **p = vec;
1149       while (*p)
1150         xfree (*p++);
1151       xfree (vec);
1152     }
1153 }
1154
1155 /* Append vector V2 to vector V1.  The function frees V2 and
1156    reallocates V1 (thus you may not use the contents of neither
1157    pointer after the call).  If V1 is NULL, V2 is returned.  */
1158 char **
1159 merge_vecs (char **v1, char **v2)
1160 {
1161   int i, j;
1162
1163   if (!v1)
1164     return v2;
1165   if (!v2)
1166     return v1;
1167   if (!*v2)
1168     {
1169       /* To avoid j == 0 */
1170       xfree (v2);
1171       return v1;
1172     }
1173   /* Count v1.  */
1174   for (i = 0; v1[i]; i++);
1175   /* Count v2.  */
1176   for (j = 0; v2[j]; j++);
1177   /* Reallocate v1.  */
1178   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1179   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1180   xfree (v2);
1181   return v1;
1182 }
1183
1184 /* A set of simple-minded routines to store strings in a linked list.
1185    This used to also be used for searching, but now we have hash
1186    tables for that.  */
1187
1188 /* It's a shame that these simple things like linked lists and hash
1189    tables (see hash.c) need to be implemented over and over again.  It
1190    would be nice to be able to use the routines from glib -- see
1191    www.gtk.org for details.  However, that would make Wget depend on
1192    glib, and I want to avoid dependencies to external libraries for
1193    reasons of convenience and portability (I suspect Wget is more
1194    portable than anything ever written for Gnome).  */
1195
1196 /* Append an element to the list.  If the list has a huge number of
1197    elements, this can get slow because it has to find the list's
1198    ending.  If you think you have to call slist_append in a loop,
1199    think about calling slist_prepend() followed by slist_nreverse().  */
1200
1201 slist *
1202 slist_append (slist *l, const char *s)
1203 {
1204   slist *newel = (slist *)xmalloc (sizeof (slist));
1205   slist *beg = l;
1206
1207   newel->string = xstrdup (s);
1208   newel->next = NULL;
1209
1210   if (!l)
1211     return newel;
1212   /* Find the last element.  */
1213   while (l->next)
1214     l = l->next;
1215   l->next = newel;
1216   return beg;
1217 }
1218
1219 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1220
1221 slist *
1222 slist_prepend (slist *l, const char *s)
1223 {
1224   slist *newel = (slist *)xmalloc (sizeof (slist));
1225   newel->string = xstrdup (s);
1226   newel->next = l;
1227   return newel;
1228 }
1229
1230 /* Destructively reverse L. */
1231
1232 slist *
1233 slist_nreverse (slist *l)
1234 {
1235   slist *prev = NULL;
1236   while (l)
1237     {
1238       slist *next = l->next;
1239       l->next = prev;
1240       prev = l;
1241       l = next;
1242     }
1243   return prev;
1244 }
1245
1246 /* Is there a specific entry in the list?  */
1247 int
1248 slist_contains (slist *l, const char *s)
1249 {
1250   for (; l; l = l->next)
1251     if (!strcmp (l->string, s))
1252       return 1;
1253   return 0;
1254 }
1255
1256 /* Free the whole slist.  */
1257 void
1258 slist_free (slist *l)
1259 {
1260   while (l)
1261     {
1262       slist *n = l->next;
1263       xfree (l->string);
1264       xfree (l);
1265       l = n;
1266     }
1267 }
1268 \f
1269 /* Sometimes it's useful to create "sets" of strings, i.e. special
1270    hash tables where you want to store strings as keys and merely
1271    query for their existence.  Here is a set of utility routines that
1272    makes that transparent.  */
1273
1274 void
1275 string_set_add (struct hash_table *ht, const char *s)
1276 {
1277   /* First check whether the set element already exists.  If it does,
1278      do nothing so that we don't have to free() the old element and
1279      then strdup() a new one.  */
1280   if (hash_table_contains (ht, s))
1281     return;
1282
1283   /* We use "1" as value.  It provides us a useful and clear arbitrary
1284      value, and it consumes no memory -- the pointers to the same
1285      string "1" will be shared by all the key-value pairs in all `set'
1286      hash tables.  */
1287   hash_table_put (ht, xstrdup (s), "1");
1288 }
1289
1290 /* Synonym for hash_table_contains... */
1291
1292 int
1293 string_set_contains (struct hash_table *ht, const char *s)
1294 {
1295   return hash_table_contains (ht, s);
1296 }
1297
1298 static int
1299 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1300 {
1301   xfree (key);
1302   return 0;
1303 }
1304
1305 void
1306 string_set_free (struct hash_table *ht)
1307 {
1308   hash_table_map (ht, string_set_free_mapper, NULL);
1309   hash_table_destroy (ht);
1310 }
1311
1312 static int
1313 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1314 {
1315   xfree (key);
1316   xfree (value);
1317   return 0;
1318 }
1319
1320 /* Another utility function: call free() on all keys and values of HT.  */
1321
1322 void
1323 free_keys_and_values (struct hash_table *ht)
1324 {
1325   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1326 }
1327
1328 \f
1329 /* Engine for legible and legible_very_long; this function works on
1330    strings.  */
1331
1332 static char *
1333 legible_1 (const char *repr)
1334 {
1335   static char outbuf[128];
1336   int i, i1, mod;
1337   char *outptr;
1338   const char *inptr;
1339
1340   /* Reset the pointers.  */
1341   outptr = outbuf;
1342   inptr = repr;
1343   /* If the number is negative, shift the pointers.  */
1344   if (*inptr == '-')
1345     {
1346       *outptr++ = '-';
1347       ++inptr;
1348     }
1349   /* How many digits before the first separator?  */
1350   mod = strlen (inptr) % 3;
1351   /* Insert them.  */
1352   for (i = 0; i < mod; i++)
1353     *outptr++ = inptr[i];
1354   /* Now insert the rest of them, putting separator before every
1355      third digit.  */
1356   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1357     {
1358       if (i % 3 == 0 && i1 != 0)
1359         *outptr++ = ',';
1360       *outptr++ = inptr[i1];
1361     }
1362   /* Zero-terminate the string.  */
1363   *outptr = '\0';
1364   return outbuf;
1365 }
1366
1367 /* Legible -- return a static pointer to the legibly printed long.  */
1368 char *
1369 legible (long l)
1370 {
1371   char inbuf[24];
1372   /* Print the number into the buffer.  */
1373   long_to_string (inbuf, l);
1374   return legible_1 (inbuf);
1375 }
1376
1377 /* Write a string representation of NUMBER into the provided buffer.
1378    We cannot use sprintf() because we cannot be sure whether the
1379    platform supports printing of what we chose for VERY_LONG_TYPE.
1380
1381    Example: Gcc supports `long long' under many platforms, but on many
1382    of those the native libc knows nothing of it and therefore cannot
1383    print it.
1384
1385    How long BUFFER needs to be depends on the platform and the content
1386    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1387    bytes are sufficient.  Using more might be a good idea.
1388
1389    This function does not go through the hoops that long_to_string
1390    goes to because it doesn't aspire to be fast.  (It's called perhaps
1391    once in a Wget run.)  */
1392
1393 static void
1394 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1395 {
1396   int i = 0;
1397   int j;
1398
1399   /* Print the number backwards... */
1400   do
1401     {
1402       buffer[i++] = '0' + number % 10;
1403       number /= 10;
1404     }
1405   while (number);
1406
1407   /* ...and reverse the order of the digits. */
1408   for (j = 0; j < i / 2; j++)
1409     {
1410       char c = buffer[j];
1411       buffer[j] = buffer[i - 1 - j];
1412       buffer[i - 1 - j] = c;
1413     }
1414   buffer[i] = '\0';
1415 }
1416
1417 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1418 char *
1419 legible_very_long (VERY_LONG_TYPE l)
1420 {
1421   char inbuf[128];
1422   /* Print the number into the buffer.  */
1423   very_long_to_string (inbuf, l);
1424   return legible_1 (inbuf);
1425 }
1426
1427 /* Count the digits in a (long) integer.  */
1428 int
1429 numdigit (long a)
1430 {
1431   int res = 1;
1432   if (a < 0)
1433     {
1434       a = -a;
1435       ++res;
1436     }
1437   while ((a /= 10) != 0)
1438     ++res;
1439   return res;
1440 }
1441
1442 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1443 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1444
1445 #define DIGITS_1(figure) ONE_DIGIT (figure)
1446 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1447 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1448 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1449 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1450 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1451 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1452 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1453 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1454 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1455
1456 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1457
1458 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1459 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1460 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1461 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1462 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1463 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1464 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1465 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1466 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1467
1468 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1469    to `sprintf(buffer, "%ld", number)', only much faster.
1470
1471    The speedup may make a difference in programs that frequently
1472    convert numbers to strings.  Some implementations of sprintf,
1473    particularly the one in GNU libc, have been known to be extremely
1474    slow compared to this function.
1475
1476    BUFFER should accept as many bytes as you expect the number to take
1477    up.  On machines with 64-bit longs the maximum needed size is 24
1478    bytes.  That includes the worst-case digits, the optional `-' sign,
1479    and the trailing \0.  */
1480
1481 void
1482 long_to_string (char *buffer, long number)
1483 {
1484   char *p = buffer;
1485   long n = number;
1486
1487 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1488   /* We are running in a strange or misconfigured environment.  Let
1489      sprintf cope with it.  */
1490   sprintf (buffer, "%ld", n);
1491 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1492
1493   if (n < 0)
1494     {
1495       *p++ = '-';
1496       n = -n;
1497     }
1498
1499   if      (n < 10)                   { DIGITS_1 (1); }
1500   else if (n < 100)                  { DIGITS_2 (10); }
1501   else if (n < 1000)                 { DIGITS_3 (100); }
1502   else if (n < 10000)                { DIGITS_4 (1000); }
1503   else if (n < 100000)               { DIGITS_5 (10000); }
1504   else if (n < 1000000)              { DIGITS_6 (100000); }
1505   else if (n < 10000000)             { DIGITS_7 (1000000); }
1506   else if (n < 100000000)            { DIGITS_8 (10000000); }
1507   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1508 #if SIZEOF_LONG == 4
1509   /* ``if (1)'' serves only to preserve editor indentation. */
1510   else if (1)                        { DIGITS_10 (1000000000); }
1511 #else  /* SIZEOF_LONG != 4 */
1512   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1513   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1514   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1515   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1516   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1517   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1518   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1519   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1520   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1521   else                               { DIGITS_19 (1000000000000000000L); }
1522 #endif /* SIZEOF_LONG != 4 */
1523
1524   *p = '\0';
1525 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1526 }
1527
1528 #undef ONE_DIGIT
1529 #undef ONE_DIGIT_ADVANCE
1530
1531 #undef DIGITS_1
1532 #undef DIGITS_2
1533 #undef DIGITS_3
1534 #undef DIGITS_4
1535 #undef DIGITS_5
1536 #undef DIGITS_6
1537 #undef DIGITS_7
1538 #undef DIGITS_8
1539 #undef DIGITS_9
1540 #undef DIGITS_10
1541 #undef DIGITS_11
1542 #undef DIGITS_12
1543 #undef DIGITS_13
1544 #undef DIGITS_14
1545 #undef DIGITS_15
1546 #undef DIGITS_16
1547 #undef DIGITS_17
1548 #undef DIGITS_18
1549 #undef DIGITS_19
1550 \f
1551 /* Support for timers. */
1552
1553 #undef TIMER_WINDOWS
1554 #undef TIMER_GETTIMEOFDAY
1555 #undef TIMER_TIME
1556
1557 /* Depending on the OS and availability of gettimeofday(), one and
1558    only one of the above constants will be defined.  Virtually all
1559    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1560    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1561    non-Windows systems without gettimeofday.
1562
1563    #### Perhaps we should also support ftime(), which exists on old
1564    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1565    C, if memory serves me.)  */
1566
1567 #ifdef WINDOWS
1568 # define TIMER_WINDOWS
1569 #else  /* not WINDOWS */
1570 # ifdef HAVE_GETTIMEOFDAY
1571 #  define TIMER_GETTIMEOFDAY
1572 # else
1573 #  define TIMER_TIME
1574 # endif
1575 #endif /* not WINDOWS */
1576
1577 struct wget_timer {
1578 #ifdef TIMER_GETTIMEOFDAY
1579   long secs;
1580   long usecs;
1581 #endif
1582
1583 #ifdef TIMER_TIME
1584   time_t secs;
1585 #endif
1586
1587 #ifdef TIMER_WINDOWS
1588   ULARGE_INTEGER wintime;
1589 #endif
1590 };
1591
1592 /* Allocate a timer.  It is not legal to do anything with a freshly
1593    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1594
1595 struct wget_timer *
1596 wtimer_allocate (void)
1597 {
1598   struct wget_timer *wt =
1599     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1600   return wt;
1601 }
1602
1603 /* Allocate a new timer and reset it.  Return the new timer. */
1604
1605 struct wget_timer *
1606 wtimer_new (void)
1607 {
1608   struct wget_timer *wt = wtimer_allocate ();
1609   wtimer_reset (wt);
1610   return wt;
1611 }
1612
1613 /* Free the resources associated with the timer.  Its further use is
1614    prohibited.  */
1615
1616 void
1617 wtimer_delete (struct wget_timer *wt)
1618 {
1619   xfree (wt);
1620 }
1621
1622 /* Reset timer WT.  This establishes the starting point from which
1623    wtimer_elapsed() will return the number of elapsed
1624    milliseconds.  It is allowed to reset a previously used timer.  */
1625
1626 void
1627 wtimer_reset (struct wget_timer *wt)
1628 {
1629 #ifdef TIMER_GETTIMEOFDAY
1630   struct timeval t;
1631   gettimeofday (&t, NULL);
1632   wt->secs  = t.tv_sec;
1633   wt->usecs = t.tv_usec;
1634 #endif
1635
1636 #ifdef TIMER_TIME
1637   wt->secs = time (NULL);
1638 #endif
1639
1640 #ifdef TIMER_WINDOWS
1641   FILETIME ft;
1642   SYSTEMTIME st;
1643   GetSystemTime (&st);
1644   SystemTimeToFileTime (&st, &ft);
1645   wt->wintime.HighPart = ft.dwHighDateTime;
1646   wt->wintime.LowPart  = ft.dwLowDateTime;
1647 #endif
1648 }
1649
1650 /* Return the number of milliseconds elapsed since the timer was last
1651    reset.  It is allowed to call this function more than once to get
1652    increasingly higher elapsed values.  */
1653
1654 long
1655 wtimer_elapsed (struct wget_timer *wt)
1656 {
1657 #ifdef TIMER_GETTIMEOFDAY
1658   struct timeval t;
1659   gettimeofday (&t, NULL);
1660   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1661 #endif
1662
1663 #ifdef TIMER_TIME
1664   time_t now = time (NULL);
1665   return 1000 * (now - wt->secs);
1666 #endif
1667
1668 #ifdef WINDOWS
1669   FILETIME ft;
1670   SYSTEMTIME st;
1671   ULARGE_INTEGER uli;
1672   GetSystemTime (&st);
1673   SystemTimeToFileTime (&st, &ft);
1674   uli.HighPart = ft.dwHighDateTime;
1675   uli.LowPart = ft.dwLowDateTime;
1676   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1677 #endif
1678 }
1679
1680 /* Return the assessed granularity of the timer implementation.  This
1681    is important for certain code that tries to deal with "zero" time
1682    intervals.  */
1683
1684 long
1685 wtimer_granularity (void)
1686 {
1687 #ifdef TIMER_GETTIMEOFDAY
1688   /* Granularity of gettimeofday is hugely architecture-dependent.
1689      However, it appears that on modern machines it is better than
1690      1ms.  */
1691   return 1;
1692 #endif
1693
1694 #ifdef TIMER_TIME
1695   /* This is clear. */
1696   return 1000;
1697 #endif
1698
1699 #ifdef TIMER_WINDOWS
1700   /* ? */
1701   return 1;
1702 #endif
1703 }
1704 \f
1705 /* This should probably be at a better place, but it doesn't really
1706    fit into html-parse.c.  */
1707
1708 /* The function returns the pointer to the malloc-ed quoted version of
1709    string s.  It will recognize and quote numeric and special graphic
1710    entities, as per RFC1866:
1711
1712    `&' -> `&amp;'
1713    `<' -> `&lt;'
1714    `>' -> `&gt;'
1715    `"' -> `&quot;'
1716    SP  -> `&#32;'
1717
1718    No other entities are recognized or replaced.  */
1719 char *
1720 html_quote_string (const char *s)
1721 {
1722   const char *b = s;
1723   char *p, *res;
1724   int i;
1725
1726   /* Pass through the string, and count the new size.  */
1727   for (i = 0; *s; s++, i++)
1728     {
1729       if (*s == '&')
1730         i += 4;                 /* `amp;' */
1731       else if (*s == '<' || *s == '>')
1732         i += 3;                 /* `lt;' and `gt;' */
1733       else if (*s == '\"')
1734         i += 5;                 /* `quot;' */
1735       else if (*s == ' ')
1736         i += 4;                 /* #32; */
1737     }
1738   res = (char *)xmalloc (i + 1);
1739   s = b;
1740   for (p = res; *s; s++)
1741     {
1742       switch (*s)
1743         {
1744         case '&':
1745           *p++ = '&';
1746           *p++ = 'a';
1747           *p++ = 'm';
1748           *p++ = 'p';
1749           *p++ = ';';
1750           break;
1751         case '<': case '>':
1752           *p++ = '&';
1753           *p++ = (*s == '<' ? 'l' : 'g');
1754           *p++ = 't';
1755           *p++ = ';';
1756           break;
1757         case '\"':
1758           *p++ = '&';
1759           *p++ = 'q';
1760           *p++ = 'u';
1761           *p++ = 'o';
1762           *p++ = 't';
1763           *p++ = ';';
1764           break;
1765         case ' ':
1766           *p++ = '&';
1767           *p++ = '#';
1768           *p++ = '3';
1769           *p++ = '2';
1770           *p++ = ';';
1771           break;
1772         default:
1773           *p++ = *s;
1774         }
1775     }
1776   *p = '\0';
1777   return res;
1778 }
1779
1780 /* Determine the width of the terminal we're running on.  If that's
1781    not possible, return 0.  */
1782
1783 int
1784 determine_screen_width (void)
1785 {
1786   /* If there's a way to get the terminal size using POSIX
1787      tcgetattr(), somebody please tell me.  */
1788 #ifndef TIOCGWINSZ
1789   return 0;
1790 #else  /* TIOCGWINSZ */
1791   int fd;
1792   struct winsize wsz;
1793
1794   if (opt.lfilename != NULL)
1795     return 0;
1796
1797   fd = fileno (stderr);
1798   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1799     return 0;                   /* most likely ENOTTY */
1800
1801   return wsz.ws_col;
1802 #endif /* TIOCGWINSZ */
1803 }
1804
1805 #if 0
1806 /* A debugging function for checking whether an MD5 library works. */
1807
1808 #include "gen-md5.h"
1809
1810 char *
1811 debug_test_md5 (char *buf)
1812 {
1813   unsigned char raw[16];
1814   static char res[33];
1815   unsigned char *p1;
1816   char *p2;
1817   int cnt;
1818   ALLOCA_MD5_CONTEXT (ctx);
1819
1820   gen_md5_init (ctx);
1821   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1822   gen_md5_finish (ctx, raw);
1823
1824   p1 = raw;
1825   p2 = res;
1826   cnt = 16;
1827   while (cnt--)
1828     {
1829       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1830       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1831       ++p1;
1832     }
1833   *p2 = '\0';
1834
1835   return res;
1836 }
1837 #endif