sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 #include "wget.h"
  63 #include "utils.h"
  64 #include "fnmatch.h"
  65 #include "hash.h"
  66
  67 #ifndef errno
  68 extern int errno;
  69 #endif
  70
  71 /* This section implements several wrappers around the basic
  72    allocation routines.  This is done for two reasons: first, so that
  73    the callers of these functions need not consistently check for
  74    errors.  If there is not enough virtual memory for running Wget,
  75    something is seriously wrong, and Wget exits with an appropriate
  76    error message.
  77
  78    The second reason why these are useful is that, if DEBUG_MALLOC is
  79    defined, they also provide a handy (if crude) malloc debugging
  80    interface that checks memory leaks.  */
  81
  82 /* Croak the fatal memory error and bail out with non-zero exit
  83    status.  */
  84 static void
  85 memfatal (const char *what)
  86 {
  87   /* Make sure we don't try to store part of the log line, and thus
  88      call malloc.  */
  89   log_set_save_context (0);
  90   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  91   exit (1);
  92 }
  93
  94 /* These functions end with _real because they need to be
  95    distinguished from the debugging functions, and from the macros.
  96    Explanation follows:
  97
  98    If memory debugging is not turned on, wget.h defines these:
  99
 100      #define xmalloc xmalloc_real
 101      #define xrealloc xrealloc_real
 102      #define xstrdup xstrdup_real
 103      #define xfree free
 104
 105    In case of memory debugging, the definitions are a bit more
 106    complex, because we want to provide more information, *and* we want
 107    to call the debugging code.  (The former is the reason why xmalloc
 108    and friends need to be macros in the first place.)  Then it looks
 109    like this:
 110
 111      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 112      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 113      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 114      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 115
 116    Each of the *_debug function does its magic and calls the real one.  */
 117
 118 #ifdef DEBUG_MALLOC
 119 # define STATIC_IF_DEBUG static
 120 #else
 121 # define STATIC_IF_DEBUG
 122 #endif
 123
 124 STATIC_IF_DEBUG void *
 125 xmalloc_real (size_t size)
 126 {
 127   void *ptr = malloc (size);
 128   if (!ptr)
 129     memfatal ("malloc");
 130   return ptr;
 131 }
 132
 133 STATIC_IF_DEBUG void *
 134 xrealloc_real (void *ptr, size_t newsize)
 135 {
 136   void *newptr;
 137
 138   /* Not all Un*xes have the feature of realloc() that calling it with
 139      a NULL-pointer is the same as malloc(), but it is easy to
 140      simulate.  */
 141   if (ptr)
 142     newptr = realloc (ptr, newsize);
 143   else
 144     newptr = malloc (newsize);
 145   if (!newptr)
 146     memfatal ("realloc");
 147   return newptr;
 148 }
 149
 150 STATIC_IF_DEBUG char *
 151 xstrdup_real (const char *s)
 152 {
 153   char *copy;
 154
 155 #ifndef HAVE_STRDUP
 156   int l = strlen (s);
 157   copy = malloc (l + 1);
 158   if (!copy)
 159     memfatal ("strdup");
 160   memcpy (copy, s, l + 1);
 161 #else  /* HAVE_STRDUP */
 162   copy = strdup (s);
 163   if (!copy)
 164     memfatal ("strdup");
 165 #endif /* HAVE_STRDUP */
 166
 167   return copy;
 168 }
 169
 170 #ifdef DEBUG_MALLOC
 171
 172 /* Crude home-grown routines for debugging some malloc-related
 173    problems.  Featured:
 174
 175    * Counting the number of malloc and free invocations, and reporting
 176      the "balance", i.e. how many times more malloc was called than it
 177      was the case with free.
 178
 179    * Making malloc store its entry into a simple array and free remove
 180      stuff from that array.  At the end, print the pointers which have
 181      not been freed, along with the source file and the line number.
 182      This also has the side-effect of detecting freeing memory that
 183      was never allocated.
 184
 185    Note that this kind of memory leak checking strongly depends on
 186    every malloc() being followed by a free(), even if the program is
 187    about to finish.  Wget is careful to free the data structure it
 188    allocated in init.c.  */
 189
 190 static int malloc_count, free_count;
 191
 192 static struct {
 193   char *ptr;
 194   const char *file;
 195   int line;
 196 } malloc_debug[100000];
 197
 198 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 199    which can be a real problem.  It would be nice to use a hash table
 200    for malloc_debug, but the functions in hash.c are not suitable
 201    because they can call malloc() themselves.  Maybe it would work if
 202    the hash table were preallocated to a huge size, and if we set the
 203    rehash threshold to 1.0.  */
 204
 205 /* Register PTR in malloc_debug.  Abort if this is not possible
 206    (presumably due to the number of current allocations exceeding the
 207    size of malloc_debug.)  */
 208
 209 static void
 210 register_ptr (void *ptr, const char *file, int line)
 211 {
 212   int i;
 213   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 214     if (malloc_debug[i].ptr == NULL)
 215       {
 216         malloc_debug[i].ptr = ptr;
 217         malloc_debug[i].file = file;
 218         malloc_debug[i].line = line;
 219         return;
 220       }
 221   abort ();
 222 }
 223
 224 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 225    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 226
 227 static void
 228 unregister_ptr (void *ptr)
 229 {
 230   int i;
 231   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 232     if (malloc_debug[i].ptr == ptr)
 233       {
 234         malloc_debug[i].ptr = NULL;
 235         return;
 236       }
 237   abort ();
 238 }
 239
 240 /* Print the malloc debug stats that can be gathered from the above
 241    information.  Currently this is the count of mallocs, frees, the
 242    difference between the two, and the dump of the contents of
 243    malloc_debug.  The last part are the memory leaks.  */
 244
 245 void
 246 print_malloc_debug_stats (void)
 247 {
 248   int i;
 249   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 250           malloc_count, free_count, malloc_count - free_count);
 251   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 252     if (malloc_debug[i].ptr != NULL)
 253       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 254               malloc_debug[i].file, malloc_debug[i].line);
 255 }
 256
 257 void *
 258 xmalloc_debug (size_t size, const char *source_file, int source_line)
 259 {
 260   void *ptr = xmalloc_real (size);
 261   ++malloc_count;
 262   register_ptr (ptr, source_file, source_line);
 263   return ptr;
 264 }
 265
 266 void
 267 xfree_debug (void *ptr, const char *source_file, int source_line)
 268 {
 269   assert (ptr != NULL);
 270   ++free_count;
 271   unregister_ptr (ptr);
 272   free (ptr);
 273 }
 274
 275 void *
 276 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 277 {
 278   void *newptr = xrealloc_real (ptr, newsize);
 279   if (!ptr)
 280     {
 281       ++malloc_count;
 282       register_ptr (newptr, source_file, source_line);
 283     }
 284   else if (newptr != ptr)
 285     {
 286       unregister_ptr (ptr);
 287       register_ptr (newptr, source_file, source_line);
 288     }
 289   return newptr;
 290 }
 291
 292 char *
 293 xstrdup_debug (const char *s, const char *source_file, int source_line)
 294 {
 295   char *copy = xstrdup_real (s);
 296   ++malloc_count;
 297   register_ptr (copy, source_file, source_line);
 298   return copy;
 299 }
 300
 301 #endif /* DEBUG_MALLOC */
 302 \f
 303 /* Utility function: like xstrdup(), but also lowercases S.  */
 304
 305 char *
 306 xstrdup_lower (const char *s)
 307 {
 308   char *copy = xstrdup (s);
 309   char *p = copy;
 310   for (; *p; p++)
 311     *p = TOLOWER (*p);
 312   return copy;
 313 }
 314
 315 /* Return a count of how many times CHR occurs in STRING. */
 316
 317 int
 318 count_char (const char *string, char chr)
 319 {
 320   const char *p;
 321   int count = 0;
 322   for (p = string; *p; p++)
 323     if (*p == chr)
 324       ++count;
 325   return count;
 326 }
 327
 328 /* Copy the string formed by two pointers (one on the beginning, other
 329    on the char after the last char) to a new, malloc-ed location.
 330    0-terminate it.  */
 331 char *
 332 strdupdelim (const char *beg, const char *end)
 333 {
 334   char *res = (char *)xmalloc (end - beg + 1);
 335   memcpy (res, beg, end - beg);
 336   res[end - beg] = '\0';
 337   return res;
 338 }
 339
 340 /* Parse a string containing comma-separated elements, and return a
 341    vector of char pointers with the elements.  Spaces following the
 342    commas are ignored.  */
 343 char **
 344 sepstring (const char *s)
 345 {
 346   char **res;
 347   const char *p;
 348   int i = 0;
 349
 350   if (!s || !*s)
 351     return NULL;
 352   res = NULL;
 353   p = s;
 354   while (*s)
 355     {
 356       if (*s == ',')
 357         {
 358           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 359           res[i] = strdupdelim (p, s);
 360           res[++i] = NULL;
 361           ++s;
 362           /* Skip the blanks following the ','.  */
 363           while (ISSPACE (*s))
 364             ++s;
 365           p = s;
 366         }
 367       else
 368         ++s;
 369     }
 370   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 371   res[i] = strdupdelim (p, s);
 372   res[i + 1] = NULL;
 373   return res;
 374 }
 375 \f
 376 /* Return pointer to a static char[] buffer in which zero-terminated
 377    string-representation of TM (in form hh:mm:ss) is printed.
 378
 379    If TM is non-NULL, the current time-in-seconds will be stored
 380    there.
 381
 382    (#### This is misleading: one would expect TM would be used instead
 383    of the current time in that case.  This design was probably
 384    influenced by the design time(2), and should be changed at some
 385    points.  No callers use non-NULL TM anyway.)  */
 386
 387 char *
 388 time_str (time_t *tm)
 389 {
 390   static char output[15];
 391   struct tm *ptm;
 392   time_t secs = time (tm);
 393
 394   if (secs == -1)
 395     {
 396       /* In case of error, return the empty string.  Maybe we should
 397          just abort if this happens?  */
 398       *output = '\0';
 399       return output;
 400     }
 401   ptm = localtime (&secs);
 402   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 403   return output;
 404 }
 405
 406 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 407
 408 char *
 409 datetime_str (time_t *tm)
 410 {
 411   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 412   struct tm *ptm;
 413   time_t secs = time (tm);
 414
 415   if (secs == -1)
 416     {
 417       /* In case of error, return the empty string.  Maybe we should
 418          just abort if this happens?  */
 419       *output = '\0';
 420       return output;
 421     }
 422   ptm = localtime (&secs);
 423   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 424            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 425            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 426   return output;
 427 }
 428 \f
 429 /* The Windows versions of the following two functions are defined in
 430    mswindows.c.  */
 431
 432 #ifndef WINDOWS
 433 void
 434 fork_to_background (void)
 435 {
 436   pid_t pid;
 437   /* Whether we arrange our own version of opt.lfilename here.  */
 438   int changedp = 0;
 439
 440   if (!opt.lfilename)
 441     {
 442       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 443       changedp = 1;
 444     }
 445   pid = fork ();
 446   if (pid < 0)
 447     {
 448       /* parent, error */
 449       perror ("fork");
 450       exit (1);
 451     }
 452   else if (pid != 0)
 453     {
 454       /* parent, no error */
 455       printf (_("Continuing in background, pid %d.\n"), (int)pid);
 456       if (changedp)
 457         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 458       exit (0);                 /* #### should we use _exit()? */
 459     }
 460
 461   /* child: give up the privileges and keep running. */
 462   setsid ();
 463   freopen ("/dev/null", "r", stdin);
 464   freopen ("/dev/null", "w", stdout);
 465   freopen ("/dev/null", "w", stderr);
 466 }
 467 #endif /* not WINDOWS */
 468 \f
 469 /* Resolve "." and ".." elements of PATH by destructively modifying
 470    PATH.  "." is resolved by removing that path element, and ".." is
 471    resolved by removing the preceding path element.  Leading and
 472    trailing slashes are preserved.
 473
 474    Return non-zero if any changes have been made.
 475
 476    For example, "a/b/c/./../d/.." will yield "a/b/".  More exhaustive
 477    test examples are provided below.  If you change anything in this
 478    function, run test_path_simplify to make sure you haven't broken a
 479    test case.
 480
 481    A previous version of this function was based on path_simplify()
 482    from GNU Bash, but it has been rewritten for Wget 1.8.1.  */
 483
 484 int
 485 path_simplify (char *path)
 486 {
 487   int change = 0;
 488   char *p, *end;
 489
 490   if (path[0] == '/')
 491     ++path;                     /* preserve the leading '/'. */
 492
 493   p = path;
 494   end = p + strlen (p) + 1;     /* position past the terminating zero. */
 495
 496   while (1)
 497     {
 498     again:
 499       /* P should point to the beginning of a path element. */
 500
 501       if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0'))
 502         {
 503           /* Handle "./foo" by moving "foo" two characters to the
 504              left. */
 505           if (*(p + 1) == '/')
 506             {
 507               change = 1;
 508               memmove (p, p + 2, end - p);
 509               end -= 2;
 510               goto again;
 511             }
 512           else
 513             {
 514               change = 1;
 515               *p = '\0';
 516               break;
 517             }
 518         }
 519       else if (*p == '.' && *(p + 1) == '.'
 520                && (*(p + 2) == '/' || *(p + 2) == '\0'))
 521         {
 522           /* Handle "../foo" by moving "foo" one path element to the
 523              left.  */
 524           char *b = p;          /* not p-1 because P can equal PATH */
 525
 526           /* Backtrack by one path element, but not past the beginning
 527              of PATH. */
 528
 529           /* foo/bar/../baz */
 530           /*         ^ p    */
 531           /*     ^ b        */
 532
 533           if (b > path)
 534             {
 535               /* Move backwards until B hits the beginning of the
 536                  previous path element or the beginning of path. */
 537               for (--b; b > path && *(b - 1) != '/'; b--)
 538                 ;
 539             }
 540
 541           change = 1;
 542           if (*(p + 2) == '/')
 543             {
 544               memmove (b, p + 3, end - (p + 3));
 545               end -= (p + 3) - b;
 546               p = b;
 547             }
 548           else
 549             {
 550               *b = '\0';
 551               break;
 552             }
 553
 554           goto again;
 555         }
 556       else if (*p == '/')
 557         {
 558           /* Remove empty path elements.  Not mandated by rfc1808 et
 559              al, but empty path elements are not all that useful, and
 560              the rest of Wget might not deal with them well. */
 561           char *q = p;
 562           while (*q == '/')
 563             ++q;
 564           change = 1;
 565           if (*q == '\0')
 566             {
 567               *p = '\0';
 568               break;
 569             }
 570           memmove (p, q, end - q);
 571           end -= q - p;
 572           goto again;
 573         }
 574
 575       /* Skip to the next path element. */
 576       while (*p && *p != '/')
 577         ++p;
 578       if (*p == '\0')
 579         break;
 580
 581       /* Make sure P points to the beginning of the next path element,
 582          which is location after the slash. */
 583       ++p;
 584     }
 585
 586   return change;
 587 }
 588 \f
 589 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 590    specified with TM.  */
 591 void
 592 touch (const char *file, time_t tm)
 593 {
 594 #ifdef HAVE_STRUCT_UTIMBUF
 595   struct utimbuf times;
 596   times.actime = times.modtime = tm;
 597 #else
 598   time_t times[2];
 599   times[0] = times[1] = tm;
 600 #endif
 601
 602   if (utime (file, &times) == -1)
 603     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 604 }
 605
 606 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 607    nothing under MS-Windows.  */
 608 int
 609 remove_link (const char *file)
 610 {
 611   int err = 0;
 612   struct stat st;
 613
 614   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 615     {
 616       DEBUGP (("Unlinking %s (symlink).\n", file));
 617       err = unlink (file);
 618       if (err != 0)
 619         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 620                    file, strerror (errno));
 621     }
 622   return err;
 623 }
 624
 625 /* Does FILENAME exist?  This is quite a lousy implementation, since
 626    it supplies no error codes -- only a yes-or-no answer.  Thus it
 627    will return that a file does not exist if, e.g., the directory is
 628    unreadable.  I don't mind it too much currently, though.  The
 629    proper way should, of course, be to have a third, error state,
 630    other than true/false, but that would introduce uncalled-for
 631    additional complexity to the callers.  */
 632 int
 633 file_exists_p (const char *filename)
 634 {
 635 #ifdef HAVE_ACCESS
 636   return access (filename, F_OK) >= 0;
 637 #else
 638   struct stat buf;
 639   return stat (filename, &buf) >= 0;
 640 #endif
 641 }
 642
 643 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 644    Returns 0 on error.  */
 645 int
 646 file_non_directory_p (const char *path)
 647 {
 648   struct stat buf;
 649   /* Use lstat() rather than stat() so that symbolic links pointing to
 650      directories can be identified correctly.  */
 651   if (lstat (path, &buf) != 0)
 652     return 0;
 653   return S_ISDIR (buf.st_mode) ? 0 : 1;
 654 }
 655
 656 /* Return a unique filename, given a prefix and count */
 657 static char *
 658 unique_name_1 (const char *fileprefix, int count)
 659 {
 660   char *filename;
 661
 662   if (count)
 663     {
 664       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 665       sprintf (filename, "%s.%d", fileprefix, count);
 666     }
 667   else
 668     filename = xstrdup (fileprefix);
 669
 670   if (!file_exists_p (filename))
 671     return filename;
 672   else
 673     {
 674       xfree (filename);
 675       return NULL;
 676     }
 677 }
 678
 679 /* Return a unique file name, based on PREFIX.  */
 680 char *
 681 unique_name (const char *prefix)
 682 {
 683   char *file = NULL;
 684   int count = 0;
 685
 686   while (!file)
 687     file = unique_name_1 (prefix, count++);
 688   return file;
 689 }
 690 \f
 691 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 692    are missing, create them first.  In case any mkdir() call fails,
 693    return its error status.  Returns 0 on successful completion.
 694
 695    The behaviour of this function should be identical to the behaviour
 696    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 697 int
 698 make_directory (const char *directory)
 699 {
 700   int quit = 0;
 701   int i;
 702   char *dir;
 703
 704   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 705      function is unsafe if called with a read-only char *argument.  */
 706   STRDUP_ALLOCA (dir, directory);
 707
 708   /* If the first character of dir is '/', skip it (and thus enable
 709      creation of absolute-pathname directories.  */
 710   for (i = (*dir == '/'); 1; ++i)
 711     {
 712       for (; dir[i] && dir[i] != '/'; i++)
 713         ;
 714       if (!dir[i])
 715         quit = 1;
 716       dir[i] = '\0';
 717       /* Check whether the directory already exists.  */
 718       if (!file_exists_p (dir))
 719         {
 720           if (mkdir (dir, 0777) < 0)
 721             return -1;
 722         }
 723       if (quit)
 724         break;
 725       else
 726         dir[i] = '/';
 727     }
 728   return 0;
 729 }
 730
 731 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 732    should be a file name.
 733
 734    file_merge("/foo/bar", "baz")  => "/foo/baz"
 735    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 736    file_merge("foo", "bar")       => "bar"
 737
 738    In other words, it's a simpler and gentler version of uri_merge_1.  */
 739
 740 char *
 741 file_merge (const char *base, const char *file)
 742 {
 743   char *result;
 744   const char *cut = (const char *)strrchr (base, '/');
 745
 746   if (!cut)
 747     return xstrdup (file);
 748
 749   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 750   memcpy (result, base, cut - base);
 751   result[cut - base] = '/';
 752   strcpy (result + (cut - base) + 1, file);
 753
 754   return result;
 755 }
 756 \f
 757 static int in_acclist PARAMS ((const char *const *, const char *, int));
 758
 759 /* Determine whether a file is acceptable to be followed, according to
 760    lists of patterns to accept/reject.  */
 761 int
 762 acceptable (const char *s)
 763 {
 764   int l = strlen (s);
 765
 766   while (l && s[l] != '/')
 767     --l;
 768   if (s[l] == '/')
 769     s += (l + 1);
 770   if (opt.accepts)
 771     {
 772       if (opt.rejects)
 773         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 774                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 775       else
 776         return in_acclist ((const char *const *)opt.accepts, s, 1);
 777     }
 778   else if (opt.rejects)
 779     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 780   return 1;
 781 }
 782
 783 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 784    `/something', frontcmp() will return 1 only if S2 begins with
 785    `/something'.  Otherwise, 0 is returned.  */
 786 int
 787 frontcmp (const char *s1, const char *s2)
 788 {
 789   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 790   return !*s1;
 791 }
 792
 793 /* Iterate through STRLIST, and return the first element that matches
 794    S, through wildcards or front comparison (as appropriate).  */
 795 static char *
 796 proclist (char **strlist, const char *s, enum accd flags)
 797 {
 798   char **x;
 799
 800   for (x = strlist; *x; x++)
 801     if (has_wildcards_p (*x))
 802       {
 803         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 804           break;
 805       }
 806     else
 807       {
 808         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 809         if (frontcmp (p, s))
 810           break;
 811       }
 812   return *x;
 813 }
 814
 815 /* Returns whether DIRECTORY is acceptable for download, wrt the
 816    include/exclude lists.
 817
 818    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 819    and absolute paths may be freely intermixed.  */
 820 int
 821 accdir (const char *directory, enum accd flags)
 822 {
 823   /* Remove starting '/'.  */
 824   if (flags & ALLABS && *directory == '/')
 825     ++directory;
 826   if (opt.includes)
 827     {
 828       if (!proclist (opt.includes, directory, flags))
 829         return 0;
 830     }
 831   if (opt.excludes)
 832     {
 833       if (proclist (opt.excludes, directory, flags))
 834         return 0;
 835     }
 836   return 1;
 837 }
 838
 839 /* Match the end of STRING against PATTERN.  For instance:
 840
 841    match_backwards ("abc", "bc") -> 1
 842    match_backwards ("abc", "ab") -> 0
 843    match_backwards ("abc", "abc") -> 1 */
 844 int
 845 match_tail (const char *string, const char *pattern)
 846 {
 847   int i, j;
 848
 849   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 850     if (string[i] != pattern[j])
 851       break;
 852   /* If the pattern was exhausted, the match was succesful.  */
 853   if (j == -1)
 854     return 1;
 855   else
 856     return 0;
 857 }
 858
 859 /* Checks whether string S matches each element of ACCEPTS.  A list
 860    element are matched either with fnmatch() or match_tail(),
 861    according to whether the element contains wildcards or not.
 862
 863    If the BACKWARD is 0, don't do backward comparison -- just compare
 864    them normally.  */
 865 static int
 866 in_acclist (const char *const *accepts, const char *s, int backward)
 867 {
 868   for (; *accepts; accepts++)
 869     {
 870       if (has_wildcards_p (*accepts))
 871         {
 872           /* fnmatch returns 0 if the pattern *does* match the
 873              string.  */
 874           if (fnmatch (*accepts, s, 0) == 0)
 875             return 1;
 876         }
 877       else
 878         {
 879           if (backward)
 880             {
 881               if (match_tail (s, *accepts))
 882                 return 1;
 883             }
 884           else
 885             {
 886               if (!strcmp (s, *accepts))
 887                 return 1;
 888             }
 889         }
 890     }
 891   return 0;
 892 }
 893
 894 /* Return the location of STR's suffix (file extension).  Examples:
 895    suffix ("foo.bar")       -> "bar"
 896    suffix ("foo.bar.baz")   -> "baz"
 897    suffix ("/foo/bar")      -> NULL
 898    suffix ("/foo.bar/baz")  -> NULL  */
 899 char *
 900 suffix (const char *str)
 901 {
 902   int i;
 903
 904   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 905     ;
 906
 907   if (str[i++] == '.')
 908     return (char *)str + i;
 909   else
 910     return NULL;
 911 }
 912
 913 /* Read a line from FP and return the pointer to freshly allocated
 914    storage.  The stoarage space is obtained through malloc() and
 915    should be freed with free() when it is no longer needed.
 916
 917    The length of the line is not limited, except by available memory.
 918    The newline character at the end of line is retained.  The line is
 919    terminated with a zero character.
 920
 921    After end-of-file is encountered without anything being read, NULL
 922    is returned.  NULL is also returned on error.  To distinguish
 923    between these two cases, use the stdio function ferror().  */
 924
 925 char *
 926 read_whole_line (FILE *fp)
 927 {
 928   int length = 0;
 929   int bufsize = 82;
 930   char *line = (char *)xmalloc (bufsize);
 931
 932   while (fgets (line + length, bufsize - length, fp))
 933     {
 934       length += strlen (line + length);
 935       if (length == 0)
 936         /* Possible for example when reading from a binary file where
 937            a line begins with \0.  */
 938         continue;
 939
 940       if (line[length - 1] == '\n')
 941         break;
 942
 943       /* fgets() guarantees to read the whole line, or to use up the
 944          space we've given it.  We can double the buffer
 945          unconditionally.  */
 946       bufsize <<= 1;
 947       line = xrealloc (line, bufsize);
 948     }
 949   if (length == 0 || ferror (fp))
 950     {
 951       xfree (line);
 952       return NULL;
 953     }
 954   if (length + 1 < bufsize)
 955     /* Relieve the memory from our exponential greediness.  We say
 956        `length + 1' because the terminating \0 is not included in
 957        LENGTH.  We don't need to zero-terminate the string ourselves,
 958        though, because fgets() does that.  */
 959     line = xrealloc (line, length + 1);
 960   return line;
 961 }
 962 \f
 963 /* Read FILE into memory.  A pointer to `struct file_memory' are
 964    returned; use struct element `content' to access file contents, and
 965    the element `length' to know the file length.  `content' is *not*
 966    zero-terminated, and you should *not* read or write beyond the [0,
 967    length) range of characters.
 968
 969    After you are done with the file contents, call read_file_free to
 970    release the memory.
 971
 972    Depending on the operating system and the type of file that is
 973    being read, read_file() either mmap's the file into memory, or
 974    reads the file into the core using read().
 975
 976    If file is named "-", fileno(stdin) is used for reading instead.
 977    If you want to read from a real file named "-", use "./-" instead.  */
 978
 979 struct file_memory *
 980 read_file (const char *file)
 981 {
 982   int fd;
 983   struct file_memory *fm;
 984   long size;
 985   int inhibit_close = 0;
 986
 987   /* Some magic in the finest tradition of Perl and its kin: if FILE
 988      is "-", just use stdin.  */
 989   if (HYPHENP (file))
 990     {
 991       fd = fileno (stdin);
 992       inhibit_close = 1;
 993       /* Note that we don't inhibit mmap() in this case.  If stdin is
 994          redirected from a regular file, mmap() will still work.  */
 995     }
 996   else
 997     fd = open (file, O_RDONLY);
 998   if (fd < 0)
 999     return NULL;
1000   fm = xmalloc (sizeof (struct file_memory));
1001
1002 #ifdef HAVE_MMAP
1003   {
1004     struct stat buf;
1005     if (fstat (fd, &buf) < 0)
1006       goto mmap_lose;
1007     fm->length = buf.st_size;
1008     /* NOTE: As far as I know, the callers of this function never
1009        modify the file text.  Relying on this would enable us to
1010        specify PROT_READ and MAP_SHARED for a marginal gain in
1011        efficiency, but at some cost to generality.  */
1012     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1013                         MAP_PRIVATE, fd, 0);
1014     if (fm->content == (char *)MAP_FAILED)
1015       goto mmap_lose;
1016     if (!inhibit_close)
1017       close (fd);
1018
1019     fm->mmap_p = 1;
1020     return fm;
1021   }
1022
1023  mmap_lose:
1024   /* The most common reason why mmap() fails is that FD does not point
1025      to a plain file.  However, it's also possible that mmap() doesn't
1026      work for a particular type of file.  Therefore, whenever mmap()
1027      fails, we just fall back to the regular method.  */
1028 #endif /* HAVE_MMAP */
1029
1030   fm->length = 0;
1031   size = 512;                   /* number of bytes fm->contents can
1032                                    hold at any given time. */
1033   fm->content = xmalloc (size);
1034   while (1)
1035     {
1036       long nread;
1037       if (fm->length > size / 2)
1038         {
1039           /* #### I'm not sure whether the whole exponential-growth
1040              thing makes sense with kernel read.  On Linux at least,
1041              read() refuses to read more than 4K from a file at a
1042              single chunk anyway.  But other Unixes might optimize it
1043              better, and it doesn't *hurt* anything, so I'm leaving
1044              it.  */
1045
1046           /* Normally, we grow SIZE exponentially to make the number
1047              of calls to read() and realloc() logarithmic in relation
1048              to file size.  However, read() can read an amount of data
1049              smaller than requested, and it would be unreasonably to
1050              double SIZE every time *something* was read.  Therefore,
1051              we double SIZE only when the length exceeds half of the
1052              entire allocated size.  */
1053           size <<= 1;
1054           fm->content = xrealloc (fm->content, size);
1055         }
1056       nread = read (fd, fm->content + fm->length, size - fm->length);
1057       if (nread > 0)
1058         /* Successful read. */
1059         fm->length += nread;
1060       else if (nread < 0)
1061         /* Error. */
1062         goto lose;
1063       else
1064         /* EOF */
1065         break;
1066     }
1067   if (!inhibit_close)
1068     close (fd);
1069   if (size > fm->length && fm->length != 0)
1070     /* Due to exponential growth of fm->content, the allocated region
1071        might be much larger than what is actually needed.  */
1072     fm->content = xrealloc (fm->content, fm->length);
1073   fm->mmap_p = 0;
1074   return fm;
1075
1076  lose:
1077   if (!inhibit_close)
1078     close (fd);
1079   xfree (fm->content);
1080   xfree (fm);
1081   return NULL;
1082 }
1083
1084 /* Release the resources held by FM.  Specifically, this calls
1085    munmap() or xfree() on fm->content, depending whether mmap or
1086    malloc/read were used to read in the file.  It also frees the
1087    memory needed to hold the FM structure itself.  */
1088
1089 void
1090 read_file_free (struct file_memory *fm)
1091 {
1092 #ifdef HAVE_MMAP
1093   if (fm->mmap_p)
1094     {
1095       munmap (fm->content, fm->length);
1096     }
1097   else
1098 #endif
1099     {
1100       xfree (fm->content);
1101     }
1102   xfree (fm);
1103 }
1104 \f
1105 /* Free the pointers in a NULL-terminated vector of pointers, then
1106    free the pointer itself.  */
1107 void
1108 free_vec (char **vec)
1109 {
1110   if (vec)
1111     {
1112       char **p = vec;
1113       while (*p)
1114         xfree (*p++);
1115       xfree (vec);
1116     }
1117 }
1118
1119 /* Append vector V2 to vector V1.  The function frees V2 and
1120    reallocates V1 (thus you may not use the contents of neither
1121    pointer after the call).  If V1 is NULL, V2 is returned.  */
1122 char **
1123 merge_vecs (char **v1, char **v2)
1124 {
1125   int i, j;
1126
1127   if (!v1)
1128     return v2;
1129   if (!v2)
1130     return v1;
1131   if (!*v2)
1132     {
1133       /* To avoid j == 0 */
1134       xfree (v2);
1135       return v1;
1136     }
1137   /* Count v1.  */
1138   for (i = 0; v1[i]; i++);
1139   /* Count v2.  */
1140   for (j = 0; v2[j]; j++);
1141   /* Reallocate v1.  */
1142   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1143   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1144   xfree (v2);
1145   return v1;
1146 }
1147
1148 /* A set of simple-minded routines to store strings in a linked list.
1149    This used to also be used for searching, but now we have hash
1150    tables for that.  */
1151
1152 /* It's a shame that these simple things like linked lists and hash
1153    tables (see hash.c) need to be implemented over and over again.  It
1154    would be nice to be able to use the routines from glib -- see
1155    www.gtk.org for details.  However, that would make Wget depend on
1156    glib, and I want to avoid dependencies to external libraries for
1157    reasons of convenience and portability (I suspect Wget is more
1158    portable than anything ever written for Gnome).  */
1159
1160 /* Append an element to the list.  If the list has a huge number of
1161    elements, this can get slow because it has to find the list's
1162    ending.  If you think you have to call slist_append in a loop,
1163    think about calling slist_prepend() followed by slist_nreverse().  */
1164
1165 slist *
1166 slist_append (slist *l, const char *s)
1167 {
1168   slist *newel = (slist *)xmalloc (sizeof (slist));
1169   slist *beg = l;
1170
1171   newel->string = xstrdup (s);
1172   newel->next = NULL;
1173
1174   if (!l)
1175     return newel;
1176   /* Find the last element.  */
1177   while (l->next)
1178     l = l->next;
1179   l->next = newel;
1180   return beg;
1181 }
1182
1183 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1184
1185 slist *
1186 slist_prepend (slist *l, const char *s)
1187 {
1188   slist *newel = (slist *)xmalloc (sizeof (slist));
1189   newel->string = xstrdup (s);
1190   newel->next = l;
1191   return newel;
1192 }
1193
1194 /* Destructively reverse L. */
1195
1196 slist *
1197 slist_nreverse (slist *l)
1198 {
1199   slist *prev = NULL;
1200   while (l)
1201     {
1202       slist *next = l->next;
1203       l->next = prev;
1204       prev = l;
1205       l = next;
1206     }
1207   return prev;
1208 }
1209
1210 /* Is there a specific entry in the list?  */
1211 int
1212 slist_contains (slist *l, const char *s)
1213 {
1214   for (; l; l = l->next)
1215     if (!strcmp (l->string, s))
1216       return 1;
1217   return 0;
1218 }
1219
1220 /* Free the whole slist.  */
1221 void
1222 slist_free (slist *l)
1223 {
1224   while (l)
1225     {
1226       slist *n = l->next;
1227       xfree (l->string);
1228       xfree (l);
1229       l = n;
1230     }
1231 }
1232 \f
1233 /* Sometimes it's useful to create "sets" of strings, i.e. special
1234    hash tables where you want to store strings as keys and merely
1235    query for their existence.  Here is a set of utility routines that
1236    makes that transparent.  */
1237
1238 void
1239 string_set_add (struct hash_table *ht, const char *s)
1240 {
1241   /* First check whether the set element already exists.  If it does,
1242      do nothing so that we don't have to free() the old element and
1243      then strdup() a new one.  */
1244   if (hash_table_contains (ht, s))
1245     return;
1246
1247   /* We use "1" as value.  It provides us a useful and clear arbitrary
1248      value, and it consumes no memory -- the pointers to the same
1249      string "1" will be shared by all the key-value pairs in all `set'
1250      hash tables.  */
1251   hash_table_put (ht, xstrdup (s), "1");
1252 }
1253
1254 /* Synonym for hash_table_contains... */
1255
1256 int
1257 string_set_contains (struct hash_table *ht, const char *s)
1258 {
1259   return hash_table_contains (ht, s);
1260 }
1261
1262 static int
1263 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1264 {
1265   xfree (key);
1266   return 0;
1267 }
1268
1269 void
1270 string_set_free (struct hash_table *ht)
1271 {
1272   hash_table_map (ht, string_set_free_mapper, NULL);
1273   hash_table_destroy (ht);
1274 }
1275
1276 static int
1277 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1278 {
1279   xfree (key);
1280   xfree (value);
1281   return 0;
1282 }
1283
1284 /* Another utility function: call free() on all keys and values of HT.  */
1285
1286 void
1287 free_keys_and_values (struct hash_table *ht)
1288 {
1289   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1290 }
1291
1292 \f
1293 /* Engine for legible and legible_very_long; this function works on
1294    strings.  */
1295
1296 static char *
1297 legible_1 (const char *repr)
1298 {
1299   static char outbuf[128];
1300   int i, i1, mod;
1301   char *outptr;
1302   const char *inptr;
1303
1304   /* Reset the pointers.  */
1305   outptr = outbuf;
1306   inptr = repr;
1307   /* If the number is negative, shift the pointers.  */
1308   if (*inptr == '-')
1309     {
1310       *outptr++ = '-';
1311       ++inptr;
1312     }
1313   /* How many digits before the first separator?  */
1314   mod = strlen (inptr) % 3;
1315   /* Insert them.  */
1316   for (i = 0; i < mod; i++)
1317     *outptr++ = inptr[i];
1318   /* Now insert the rest of them, putting separator before every
1319      third digit.  */
1320   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1321     {
1322       if (i % 3 == 0 && i1 != 0)
1323         *outptr++ = ',';
1324       *outptr++ = inptr[i1];
1325     }
1326   /* Zero-terminate the string.  */
1327   *outptr = '\0';
1328   return outbuf;
1329 }
1330
1331 /* Legible -- return a static pointer to the legibly printed long.  */
1332 char *
1333 legible (long l)
1334 {
1335   char inbuf[24];
1336   /* Print the number into the buffer.  */
1337   number_to_string (inbuf, l);
1338   return legible_1 (inbuf);
1339 }
1340
1341 /* Write a string representation of NUMBER into the provided buffer.
1342    We cannot use sprintf() because we cannot be sure whether the
1343    platform supports printing of what we chose for VERY_LONG_TYPE.
1344
1345    Example: Gcc supports `long long' under many platforms, but on many
1346    of those the native libc knows nothing of it and therefore cannot
1347    print it.
1348
1349    How long BUFFER needs to be depends on the platform and the content
1350    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1351    bytes are sufficient.  Using more might be a good idea.
1352
1353    This function does not go through the hoops that long_to_string
1354    goes to because it doesn't aspire to be fast.  (It's called perhaps
1355    once in a Wget run.)  */
1356
1357 static void
1358 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1359 {
1360   int i = 0;
1361   int j;
1362
1363   /* Print the number backwards... */
1364   do
1365     {
1366       buffer[i++] = '0' + number % 10;
1367       number /= 10;
1368     }
1369   while (number);
1370
1371   /* ...and reverse the order of the digits. */
1372   for (j = 0; j < i / 2; j++)
1373     {
1374       char c = buffer[j];
1375       buffer[j] = buffer[i - 1 - j];
1376       buffer[i - 1 - j] = c;
1377     }
1378   buffer[i] = '\0';
1379 }
1380
1381 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1382 char *
1383 legible_very_long (VERY_LONG_TYPE l)
1384 {
1385   char inbuf[128];
1386   /* Print the number into the buffer.  */
1387   very_long_to_string (inbuf, l);
1388   return legible_1 (inbuf);
1389 }
1390
1391 /* Count the digits in a (long) integer.  */
1392 int
1393 numdigit (long number)
1394 {
1395   int cnt = 1;
1396   if (number < 0)
1397     {
1398       number = -number;
1399       ++cnt;
1400     }
1401   while ((number /= 10) > 0)
1402     ++cnt;
1403   return cnt;
1404 }
1405
1406 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1407 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1408
1409 #define DIGITS_1(figure) ONE_DIGIT (figure)
1410 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1411 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1412 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1413 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1414 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1415 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1416 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1417 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1418 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1419
1420 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1421
1422 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1423 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1424 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1425 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1426 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1427 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1428 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1429 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1430 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1431
1432 /* Print NUMBER to BUFFER in base 10.  This should be completely
1433    equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1434
1435    The speedup may make a difference in programs that frequently
1436    convert numbers to strings.  Some implementations of sprintf,
1437    particularly the one in GNU libc, have been known to be extremely
1438    slow compared to this function.
1439
1440    Return the pointer to the location where the terminating zero was
1441    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1442    function is done.)
1443
1444    BUFFER should be big enough to accept as many bytes as you expect
1445    the number to take up.  On machines with 64-bit longs the maximum
1446    needed size is 24 bytes.  That includes the digits needed for the
1447    largest 64-bit number, the `-' sign in case it's negative, and the
1448    terminating '\0'.  */
1449
1450 char *
1451 number_to_string (char *buffer, long number)
1452 {
1453   char *p = buffer;
1454   long n = number;
1455
1456 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1457   /* We are running in a strange or misconfigured environment.  Let
1458      sprintf cope with it.  */
1459   sprintf (buffer, "%ld", n);
1460   p += strlen (buffer);
1461 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1462
1463   if (n < 0)
1464     {
1465       *p++ = '-';
1466       n = -n;
1467     }
1468
1469   if      (n < 10)                   { DIGITS_1 (1); }
1470   else if (n < 100)                  { DIGITS_2 (10); }
1471   else if (n < 1000)                 { DIGITS_3 (100); }
1472   else if (n < 10000)                { DIGITS_4 (1000); }
1473   else if (n < 100000)               { DIGITS_5 (10000); }
1474   else if (n < 1000000)              { DIGITS_6 (100000); }
1475   else if (n < 10000000)             { DIGITS_7 (1000000); }
1476   else if (n < 100000000)            { DIGITS_8 (10000000); }
1477   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1478 #if SIZEOF_LONG == 4
1479   /* ``if (1)'' serves only to preserve editor indentation. */
1480   else if (1)                        { DIGITS_10 (1000000000); }
1481 #else  /* SIZEOF_LONG != 4 */
1482   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1483   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1484   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1485   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1486   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1487   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1488   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1489   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1490   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1491   else                               { DIGITS_19 (1000000000000000000L); }
1492 #endif /* SIZEOF_LONG != 4 */
1493
1494   *p = '\0';
1495 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1496
1497   return p;
1498 }
1499
1500 #undef ONE_DIGIT
1501 #undef ONE_DIGIT_ADVANCE
1502
1503 #undef DIGITS_1
1504 #undef DIGITS_2
1505 #undef DIGITS_3
1506 #undef DIGITS_4
1507 #undef DIGITS_5
1508 #undef DIGITS_6
1509 #undef DIGITS_7
1510 #undef DIGITS_8
1511 #undef DIGITS_9
1512 #undef DIGITS_10
1513 #undef DIGITS_11
1514 #undef DIGITS_12
1515 #undef DIGITS_13
1516 #undef DIGITS_14
1517 #undef DIGITS_15
1518 #undef DIGITS_16
1519 #undef DIGITS_17
1520 #undef DIGITS_18
1521 #undef DIGITS_19
1522 \f
1523 /* Support for timers. */
1524
1525 #undef TIMER_WINDOWS
1526 #undef TIMER_GETTIMEOFDAY
1527 #undef TIMER_TIME
1528
1529 /* Depending on the OS and availability of gettimeofday(), one and
1530    only one of the above constants will be defined.  Virtually all
1531    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1532    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1533    non-Windows systems without gettimeofday.
1534
1535    #### Perhaps we should also support ftime(), which exists on old
1536    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1537    C, if memory serves me.)  */
1538
1539 #ifdef WINDOWS
1540 # define TIMER_WINDOWS
1541 #else  /* not WINDOWS */
1542 # ifdef HAVE_GETTIMEOFDAY
1543 #  define TIMER_GETTIMEOFDAY
1544 # else
1545 #  define TIMER_TIME
1546 # endif
1547 #endif /* not WINDOWS */
1548
1549 struct wget_timer {
1550 #ifdef TIMER_GETTIMEOFDAY
1551   long secs;
1552   long usecs;
1553 #endif
1554
1555 #ifdef TIMER_TIME
1556   time_t secs;
1557 #endif
1558
1559 #ifdef TIMER_WINDOWS
1560   ULARGE_INTEGER wintime;
1561 #endif
1562 };
1563
1564 /* Allocate a timer.  It is not legal to do anything with a freshly
1565    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1566
1567 struct wget_timer *
1568 wtimer_allocate (void)
1569 {
1570   struct wget_timer *wt =
1571     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1572   return wt;
1573 }
1574
1575 /* Allocate a new timer and reset it.  Return the new timer. */
1576
1577 struct wget_timer *
1578 wtimer_new (void)
1579 {
1580   struct wget_timer *wt = wtimer_allocate ();
1581   wtimer_reset (wt);
1582   return wt;
1583 }
1584
1585 /* Free the resources associated with the timer.  Its further use is
1586    prohibited.  */
1587
1588 void
1589 wtimer_delete (struct wget_timer *wt)
1590 {
1591   xfree (wt);
1592 }
1593
1594 /* Reset timer WT.  This establishes the starting point from which
1595    wtimer_elapsed() will return the number of elapsed
1596    milliseconds.  It is allowed to reset a previously used timer.  */
1597
1598 void
1599 wtimer_reset (struct wget_timer *wt)
1600 {
1601 #ifdef TIMER_GETTIMEOFDAY
1602   struct timeval t;
1603   gettimeofday (&t, NULL);
1604   wt->secs  = t.tv_sec;
1605   wt->usecs = t.tv_usec;
1606 #endif
1607
1608 #ifdef TIMER_TIME
1609   wt->secs = time (NULL);
1610 #endif
1611
1612 #ifdef TIMER_WINDOWS
1613   FILETIME ft;
1614   SYSTEMTIME st;
1615   GetSystemTime (&st);
1616   SystemTimeToFileTime (&st, &ft);
1617   wt->wintime.HighPart = ft.dwHighDateTime;
1618   wt->wintime.LowPart  = ft.dwLowDateTime;
1619 #endif
1620 }
1621
1622 /* Return the number of milliseconds elapsed since the timer was last
1623    reset.  It is allowed to call this function more than once to get
1624    increasingly higher elapsed values.  */
1625
1626 long
1627 wtimer_elapsed (struct wget_timer *wt)
1628 {
1629 #ifdef TIMER_GETTIMEOFDAY
1630   struct timeval t;
1631   gettimeofday (&t, NULL);
1632   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1633 #endif
1634
1635 #ifdef TIMER_TIME
1636   time_t now = time (NULL);
1637   return 1000 * (now - wt->secs);
1638 #endif
1639
1640 #ifdef WINDOWS
1641   FILETIME ft;
1642   SYSTEMTIME st;
1643   ULARGE_INTEGER uli;
1644   GetSystemTime (&st);
1645   SystemTimeToFileTime (&st, &ft);
1646   uli.HighPart = ft.dwHighDateTime;
1647   uli.LowPart = ft.dwLowDateTime;
1648   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1649 #endif
1650 }
1651
1652 /* Return the assessed granularity of the timer implementation.  This
1653    is important for certain code that tries to deal with "zero" time
1654    intervals.  */
1655
1656 long
1657 wtimer_granularity (void)
1658 {
1659 #ifdef TIMER_GETTIMEOFDAY
1660   /* Granularity of gettimeofday is hugely architecture-dependent.
1661      However, it appears that on modern machines it is better than
1662      1ms.  */
1663   return 1;
1664 #endif
1665
1666 #ifdef TIMER_TIME
1667   /* This is clear. */
1668   return 1000;
1669 #endif
1670
1671 #ifdef TIMER_WINDOWS
1672   /* ? */
1673   return 1;
1674 #endif
1675 }
1676 \f
1677 /* This should probably be at a better place, but it doesn't really
1678    fit into html-parse.c.  */
1679
1680 /* The function returns the pointer to the malloc-ed quoted version of
1681    string s.  It will recognize and quote numeric and special graphic
1682    entities, as per RFC1866:
1683
1684    `&' -> `&amp;'
1685    `<' -> `&lt;'
1686    `>' -> `&gt;'
1687    `"' -> `&quot;'
1688    SP  -> `&#32;'
1689
1690    No other entities are recognized or replaced.  */
1691 char *
1692 html_quote_string (const char *s)
1693 {
1694   const char *b = s;
1695   char *p, *res;
1696   int i;
1697
1698   /* Pass through the string, and count the new size.  */
1699   for (i = 0; *s; s++, i++)
1700     {
1701       if (*s == '&')
1702         i += 4;                 /* `amp;' */
1703       else if (*s == '<' || *s == '>')
1704         i += 3;                 /* `lt;' and `gt;' */
1705       else if (*s == '\"')
1706         i += 5;                 /* `quot;' */
1707       else if (*s == ' ')
1708         i += 4;                 /* #32; */
1709     }
1710   res = (char *)xmalloc (i + 1);
1711   s = b;
1712   for (p = res; *s; s++)
1713     {
1714       switch (*s)
1715         {
1716         case '&':
1717           *p++ = '&';
1718           *p++ = 'a';
1719           *p++ = 'm';
1720           *p++ = 'p';
1721           *p++ = ';';
1722           break;
1723         case '<': case '>':
1724           *p++ = '&';
1725           *p++ = (*s == '<' ? 'l' : 'g');
1726           *p++ = 't';
1727           *p++ = ';';
1728           break;
1729         case '\"':
1730           *p++ = '&';
1731           *p++ = 'q';
1732           *p++ = 'u';
1733           *p++ = 'o';
1734           *p++ = 't';
1735           *p++ = ';';
1736           break;
1737         case ' ':
1738           *p++ = '&';
1739           *p++ = '#';
1740           *p++ = '3';
1741           *p++ = '2';
1742           *p++ = ';';
1743           break;
1744         default:
1745           *p++ = *s;
1746         }
1747     }
1748   *p = '\0';
1749   return res;
1750 }
1751
1752 /* Determine the width of the terminal we're running on.  If that's
1753    not possible, return 0.  */
1754
1755 int
1756 determine_screen_width (void)
1757 {
1758   /* If there's a way to get the terminal size using POSIX
1759      tcgetattr(), somebody please tell me.  */
1760 #ifndef TIOCGWINSZ
1761   return 0;
1762 #else  /* TIOCGWINSZ */
1763   int fd;
1764   struct winsize wsz;
1765
1766   if (opt.lfilename != NULL)
1767     return 0;
1768
1769   fd = fileno (stderr);
1770   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1771     return 0;                   /* most likely ENOTTY */
1772
1773   return wsz.ws_col;
1774 #endif /* TIOCGWINSZ */
1775 }
1776
1777 #if 0
1778 /* A debugging function for checking whether an MD5 library works. */
1779
1780 #include "gen-md5.h"
1781
1782 char *
1783 debug_test_md5 (char *buf)
1784 {
1785   unsigned char raw[16];
1786   static char res[33];
1787   unsigned char *p1;
1788   char *p2;
1789   int cnt;
1790   ALLOCA_MD5_CONTEXT (ctx);
1791
1792   gen_md5_init (ctx);
1793   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1794   gen_md5_finish (ctx, raw);
1795
1796   p1 = raw;
1797   p2 = res;
1798   cnt = 16;
1799   while (cnt--)
1800     {
1801       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1802       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1803       ++p1;
1804     }
1805   *p2 = '\0';
1806
1807   return res;
1808 }
1809 #endif
1810
1811 #if 0
1812 /* Debugging and testing support for path_simplify. */
1813
1814 /* Debug: run path_simplify on PATH and return the result in a new
1815    string.  Useful for calling from the debugger.  */
1816 static char *
1817 ps (char *path)
1818 {
1819   char *copy = xstrdup (path);
1820   path_simplify (copy);
1821   return copy;
1822 }
1823
1824 static void
1825 run_test (char *test, char *expected_result, int expected_change)
1826 {
1827   char *test_copy = xstrdup (test);
1828   int modified = path_simplify (test_copy);
1829
1830   if (0 != strcmp (test_copy, expected_result))
1831     {
1832       printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
1833               test, expected_result, test_copy);
1834     }
1835   if (modified != expected_change)
1836     {
1837       if (expected_change == 1)
1838         printf ("Expected no modification with path_simplify(\"%s\").\n",
1839                 test);
1840       else
1841         printf ("Expected modification with path_simplify(\"%s\").\n",
1842                 test);
1843     }
1844   xfree (test_copy);
1845 }
1846
1847 static void
1848 test_path_simplify (void)
1849 {
1850   static struct {
1851     char *test, *result;
1852     int should_modify;
1853   } tests[] = {
1854     { "",               "",             0 },
1855     { ".",              "",             1 },
1856     { "..",             "",             1 },
1857     { "foo",            "foo",          0 },
1858     { "foo/bar",        "foo/bar",      0 },
1859     { "foo///bar",      "foo/bar",      1 },
1860     { "foo/.",          "foo/",         1 },
1861     { "foo/./",         "foo/",         1 },
1862     { "foo./",          "foo./",        0 },
1863     { "foo/../bar",     "bar",          1 },
1864     { "foo/../bar/",    "bar/",         1 },
1865     { "foo/bar/..",     "foo/",         1 },
1866     { "foo/bar/../x",   "foo/x",        1 },
1867     { "foo/bar/../x/",  "foo/x/",       1 },
1868     { "foo/..",         "",             1 },
1869     { "foo/../..",      "",             1 },
1870     { "a/b/../../c",    "c",            1 },
1871     { "./a/../b",       "b",            1 }
1872   };
1873   int i;
1874
1875   for (i = 0; i < ARRAY_SIZE (tests); i++)
1876     {
1877       char *test = tests[i].test;
1878       char *expected_result = tests[i].result;
1879       int   expected_change = tests[i].should_modify;
1880       run_test (test, expected_result, expected_change);
1881     }
1882
1883   /* Now run all the tests with a leading slash before the test case,
1884      to prove that the slash is being preserved.  */
1885   for (i = 0; i < ARRAY_SIZE (tests); i++)
1886     {
1887       char *test, *expected_result;
1888       int expected_change = tests[i].should_modify;
1889
1890       test = xmalloc (1 + strlen (tests[i].test) + 1);
1891       sprintf (test, "/%s", tests[i].test);
1892
1893       expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
1894       sprintf (expected_result, "/%s", tests[i].result);
1895
1896       run_test (test, expected_result, expected_change);
1897
1898       xfree (test);
1899       xfree (expected_result);
1900     }
1901 }
1902 #endif