sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21 In addition, as a special exception, the Free Software Foundation
  22 gives permission to link the code of its release of Wget with the
  23 OpenSSL project's "OpenSSL" library (or with modified versions of it
  24 that use the same license as the "OpenSSL" library), and distribute
  25 the linked executables.  You must obey the GNU General Public License
  26 in all respects for all of the code used other than "OpenSSL".  If you
  27 modify this file, you may extend this exception to your version of the
  28 file, but you are not obligated to do so.  If you do not wish to do
  29 so, delete this exception statement from your version.  */
  30
  31 #include <config.h>
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #ifdef HAVE_STRING_H
  36 # include <string.h>
  37 #else  /* not HAVE_STRING_H */
  38 # include <strings.h>
  39 #endif /* not HAVE_STRING_H */
  40 #include <sys/types.h>
  41 #ifdef HAVE_UNISTD_H
  42 # include <unistd.h>
  43 #endif
  44 #ifdef HAVE_MMAP
  45 # include <sys/mman.h>
  46 #endif
  47 #ifdef HAVE_PWD_H
  48 # include <pwd.h>
  49 #endif
  50 #include <limits.h>
  51 #ifdef HAVE_UTIME_H
  52 # include <utime.h>
  53 #endif
  54 #ifdef HAVE_SYS_UTIME_H
  55 # include <sys/utime.h>
  56 #endif
  57 #include <errno.h>
  58 #ifdef NeXT
  59 # include <libc.h>              /* for access() */
  60 #endif
  61 #include <fcntl.h>
  62 #include <assert.h>
  63
  64 /* For TIOCGWINSZ and friends: */
  65 #ifdef HAVE_SYS_IOCTL_H
  66 # include <sys/ioctl.h>
  67 #endif
  68 #ifdef HAVE_TERMIOS_H
  69 # include <termios.h>
  70 #endif
  71
  72 /* Needed for run_with_timeout. */
  73 #undef USE_SIGNAL_TIMEOUT
  74 #ifdef HAVE_SIGNAL_H
  75 # include <signal.h>
  76 #endif
  77 #ifdef HAVE_SETJMP_H
  78 # include <setjmp.h>
  79 #endif
  80
  81 #ifndef HAVE_SIGSETJMP
  82 /* If sigsetjmp is a macro, configure won't pick it up. */
  83 # ifdef sigsetjmp
  84 #  define HAVE_SIGSETJMP
  85 # endif
  86 #endif
  87
  88 #ifdef HAVE_SIGNAL
  89 # ifdef HAVE_SIGSETJMP
  90 #  define USE_SIGNAL_TIMEOUT
  91 # endif
  92 # ifdef HAVE_SIGBLOCK
  93 #  define USE_SIGNAL_TIMEOUT
  94 # endif
  95 #endif
  96
  97 #include "wget.h"
  98 #include "utils.h"
  99 #include "hash.h"
 100
 101 #ifndef errno
 102 extern int errno;
 103 #endif
 104
 105 /* This section implements several wrappers around the basic
 106    allocation routines.  This is done for two reasons: first, so that
 107    the callers of these functions need not consistently check for
 108    errors.  If there is not enough virtual memory for running Wget,
 109    something is seriously wrong, and Wget exits with an appropriate
 110    error message.
 111
 112    The second reason why these are useful is that, if DEBUG_MALLOC is
 113    defined, they also provide a handy (if crude) malloc debugging
 114    interface that checks memory leaks.  */
 115
 116 /* Croak the fatal memory error and bail out with non-zero exit
 117    status.  */
 118 static void
 119 memfatal (const char *what)
 120 {
 121   /* Make sure we don't try to store part of the log line, and thus
 122      call malloc.  */
 123   log_set_save_context (0);
 124   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
 125   exit (1);
 126 }
 127
 128 /* These functions end with _real because they need to be
 129    distinguished from the debugging functions, and from the macros.
 130    Explanation follows:
 131
 132    If memory debugging is not turned on, wget.h defines these:
 133
 134      #define xmalloc xmalloc_real
 135      #define xrealloc xrealloc_real
 136      #define xstrdup xstrdup_real
 137      #define xfree free
 138
 139    In case of memory debugging, the definitions are a bit more
 140    complex, because we want to provide more information, *and* we want
 141    to call the debugging code.  (The former is the reason why xmalloc
 142    and friends need to be macros in the first place.)  Then it looks
 143    like this:
 144
 145      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 146      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 147      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 148      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 149
 150    Each of the *_debug function does its magic and calls the real one.  */
 151
 152 #ifdef DEBUG_MALLOC
 153 # define STATIC_IF_DEBUG static
 154 #else
 155 # define STATIC_IF_DEBUG
 156 #endif
 157
 158 STATIC_IF_DEBUG void *
 159 xmalloc_real (size_t size)
 160 {
 161   void *ptr = malloc (size);
 162   if (!ptr)
 163     memfatal ("malloc");
 164   return ptr;
 165 }
 166
 167 STATIC_IF_DEBUG void *
 168 xrealloc_real (void *ptr, size_t newsize)
 169 {
 170   void *newptr;
 171
 172   /* Not all Un*xes have the feature of realloc() that calling it with
 173      a NULL-pointer is the same as malloc(), but it is easy to
 174      simulate.  */
 175   if (ptr)
 176     newptr = realloc (ptr, newsize);
 177   else
 178     newptr = malloc (newsize);
 179   if (!newptr)
 180     memfatal ("realloc");
 181   return newptr;
 182 }
 183
 184 STATIC_IF_DEBUG char *
 185 xstrdup_real (const char *s)
 186 {
 187   char *copy;
 188
 189 #ifndef HAVE_STRDUP
 190   int l = strlen (s);
 191   copy = malloc (l + 1);
 192   if (!copy)
 193     memfatal ("strdup");
 194   memcpy (copy, s, l + 1);
 195 #else  /* HAVE_STRDUP */
 196   copy = strdup (s);
 197   if (!copy)
 198     memfatal ("strdup");
 199 #endif /* HAVE_STRDUP */
 200
 201   return copy;
 202 }
 203
 204 #ifdef DEBUG_MALLOC
 205
 206 /* Crude home-grown routines for debugging some malloc-related
 207    problems.  Featured:
 208
 209    * Counting the number of malloc and free invocations, and reporting
 210      the "balance", i.e. how many times more malloc was called than it
 211      was the case with free.
 212
 213    * Making malloc store its entry into a simple array and free remove
 214      stuff from that array.  At the end, print the pointers which have
 215      not been freed, along with the source file and the line number.
 216      This also has the side-effect of detecting freeing memory that
 217      was never allocated.
 218
 219    Note that this kind of memory leak checking strongly depends on
 220    every malloc() being followed by a free(), even if the program is
 221    about to finish.  Wget is careful to free the data structure it
 222    allocated in init.c.  */
 223
 224 static int malloc_count, free_count;
 225
 226 static struct {
 227   char *ptr;
 228   const char *file;
 229   int line;
 230 } malloc_debug[100000];
 231
 232 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 233    which can be a real problem.  It would be nice to use a hash table
 234    for malloc_debug, but the functions in hash.c are not suitable
 235    because they can call malloc() themselves.  Maybe it would work if
 236    the hash table were preallocated to a huge size, and if we set the
 237    rehash threshold to 1.0.  */
 238
 239 /* Register PTR in malloc_debug.  Abort if this is not possible
 240    (presumably due to the number of current allocations exceeding the
 241    size of malloc_debug.)  */
 242
 243 static void
 244 register_ptr (void *ptr, const char *file, int line)
 245 {
 246   int i;
 247   for (i = 0; i < countof (malloc_debug); i++)
 248     if (malloc_debug[i].ptr == NULL)
 249       {
 250         malloc_debug[i].ptr = ptr;
 251         malloc_debug[i].file = file;
 252         malloc_debug[i].line = line;
 253         return;
 254       }
 255   abort ();
 256 }
 257
 258 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 259    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 260
 261 static void
 262 unregister_ptr (void *ptr)
 263 {
 264   int i;
 265   for (i = 0; i < countof (malloc_debug); i++)
 266     if (malloc_debug[i].ptr == ptr)
 267       {
 268         malloc_debug[i].ptr = NULL;
 269         return;
 270       }
 271   abort ();
 272 }
 273
 274 /* Print the malloc debug stats that can be gathered from the above
 275    information.  Currently this is the count of mallocs, frees, the
 276    difference between the two, and the dump of the contents of
 277    malloc_debug.  The last part are the memory leaks.  */
 278
 279 void
 280 print_malloc_debug_stats (void)
 281 {
 282   int i;
 283   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 284           malloc_count, free_count, malloc_count - free_count);
 285   for (i = 0; i < countof (malloc_debug); i++)
 286     if (malloc_debug[i].ptr != NULL)
 287       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 288               malloc_debug[i].file, malloc_debug[i].line);
 289 }
 290
 291 void *
 292 xmalloc_debug (size_t size, const char *source_file, int source_line)
 293 {
 294   void *ptr = xmalloc_real (size);
 295   ++malloc_count;
 296   register_ptr (ptr, source_file, source_line);
 297   return ptr;
 298 }
 299
 300 void
 301 xfree_debug (void *ptr, const char *source_file, int source_line)
 302 {
 303   assert (ptr != NULL);
 304   ++free_count;
 305   unregister_ptr (ptr);
 306   free (ptr);
 307 }
 308
 309 void *
 310 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 311 {
 312   void *newptr = xrealloc_real (ptr, newsize);
 313   if (!ptr)
 314     {
 315       ++malloc_count;
 316       register_ptr (newptr, source_file, source_line);
 317     }
 318   else if (newptr != ptr)
 319     {
 320       unregister_ptr (ptr);
 321       register_ptr (newptr, source_file, source_line);
 322     }
 323   return newptr;
 324 }
 325
 326 char *
 327 xstrdup_debug (const char *s, const char *source_file, int source_line)
 328 {
 329   char *copy = xstrdup_real (s);
 330   ++malloc_count;
 331   register_ptr (copy, source_file, source_line);
 332   return copy;
 333 }
 334
 335 #endif /* DEBUG_MALLOC */
 336 \f
 337 /* Utility function: like xstrdup(), but also lowercases S.  */
 338
 339 char *
 340 xstrdup_lower (const char *s)
 341 {
 342   char *copy = xstrdup (s);
 343   char *p = copy;
 344   for (; *p; p++)
 345     *p = TOLOWER (*p);
 346   return copy;
 347 }
 348
 349 /* Return a count of how many times CHR occurs in STRING. */
 350
 351 int
 352 count_char (const char *string, char chr)
 353 {
 354   const char *p;
 355   int count = 0;
 356   for (p = string; *p; p++)
 357     if (*p == chr)
 358       ++count;
 359   return count;
 360 }
 361
 362 /* Copy the string formed by two pointers (one on the beginning, other
 363    on the char after the last char) to a new, malloc-ed location.
 364    0-terminate it.  */
 365 char *
 366 strdupdelim (const char *beg, const char *end)
 367 {
 368   char *res = (char *)xmalloc (end - beg + 1);
 369   memcpy (res, beg, end - beg);
 370   res[end - beg] = '\0';
 371   return res;
 372 }
 373
 374 /* Parse a string containing comma-separated elements, and return a
 375    vector of char pointers with the elements.  Spaces following the
 376    commas are ignored.  */
 377 char **
 378 sepstring (const char *s)
 379 {
 380   char **res;
 381   const char *p;
 382   int i = 0;
 383
 384   if (!s || !*s)
 385     return NULL;
 386   res = NULL;
 387   p = s;
 388   while (*s)
 389     {
 390       if (*s == ',')
 391         {
 392           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 393           res[i] = strdupdelim (p, s);
 394           res[++i] = NULL;
 395           ++s;
 396           /* Skip the blanks following the ','.  */
 397           while (ISSPACE (*s))
 398             ++s;
 399           p = s;
 400         }
 401       else
 402         ++s;
 403     }
 404   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 405   res[i] = strdupdelim (p, s);
 406   res[i + 1] = NULL;
 407   return res;
 408 }
 409 \f
 410 /* Return pointer to a static char[] buffer in which zero-terminated
 411    string-representation of TM (in form hh:mm:ss) is printed.
 412
 413    If TM is non-NULL, the current time-in-seconds will be stored
 414    there.
 415
 416    (#### This is misleading: one would expect TM would be used instead
 417    of the current time in that case.  This design was probably
 418    influenced by the design time(2), and should be changed at some
 419    points.  No callers use non-NULL TM anyway.)  */
 420
 421 char *
 422 time_str (time_t *tm)
 423 {
 424   static char output[15];
 425   struct tm *ptm;
 426   time_t secs = time (tm);
 427
 428   if (secs == -1)
 429     {
 430       /* In case of error, return the empty string.  Maybe we should
 431          just abort if this happens?  */
 432       *output = '\0';
 433       return output;
 434     }
 435   ptm = localtime (&secs);
 436   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 437   return output;
 438 }
 439
 440 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 441
 442 char *
 443 datetime_str (time_t *tm)
 444 {
 445   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 446   struct tm *ptm;
 447   time_t secs = time (tm);
 448
 449   if (secs == -1)
 450     {
 451       /* In case of error, return the empty string.  Maybe we should
 452          just abort if this happens?  */
 453       *output = '\0';
 454       return output;
 455     }
 456   ptm = localtime (&secs);
 457   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 458            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 459            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 460   return output;
 461 }
 462 \f
 463 /* The Windows versions of the following two functions are defined in
 464    mswindows.c.  */
 465
 466 #ifndef WINDOWS
 467 void
 468 fork_to_background (void)
 469 {
 470   pid_t pid;
 471   /* Whether we arrange our own version of opt.lfilename here.  */
 472   int changedp = 0;
 473
 474   if (!opt.lfilename)
 475     {
 476       opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
 477       changedp = 1;
 478     }
 479   pid = fork ();
 480   if (pid < 0)
 481     {
 482       /* parent, error */
 483       perror ("fork");
 484       exit (1);
 485     }
 486   else if (pid != 0)
 487     {
 488       /* parent, no error */
 489       printf (_("Continuing in background, pid %d.\n"), (int)pid);
 490       if (changedp)
 491         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 492       exit (0);                 /* #### should we use _exit()? */
 493     }
 494
 495   /* child: give up the privileges and keep running. */
 496   setsid ();
 497   freopen ("/dev/null", "r", stdin);
 498   freopen ("/dev/null", "w", stdout);
 499   freopen ("/dev/null", "w", stderr);
 500 }
 501 #endif /* not WINDOWS */
 502 \f
 503 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 504    specified with TM.  */
 505 void
 506 touch (const char *file, time_t tm)
 507 {
 508 #ifdef HAVE_STRUCT_UTIMBUF
 509   struct utimbuf times;
 510   times.actime = times.modtime = tm;
 511 #else
 512   time_t times[2];
 513   times[0] = times[1] = tm;
 514 #endif
 515
 516   if (utime (file, &times) == -1)
 517     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 518 }
 519
 520 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 521    nothing under MS-Windows.  */
 522 int
 523 remove_link (const char *file)
 524 {
 525   int err = 0;
 526   struct stat st;
 527
 528   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 529     {
 530       DEBUGP (("Unlinking %s (symlink).\n", file));
 531       err = unlink (file);
 532       if (err != 0)
 533         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 534                    file, strerror (errno));
 535     }
 536   return err;
 537 }
 538
 539 /* Does FILENAME exist?  This is quite a lousy implementation, since
 540    it supplies no error codes -- only a yes-or-no answer.  Thus it
 541    will return that a file does not exist if, e.g., the directory is
 542    unreadable.  I don't mind it too much currently, though.  The
 543    proper way should, of course, be to have a third, error state,
 544    other than true/false, but that would introduce uncalled-for
 545    additional complexity to the callers.  */
 546 int
 547 file_exists_p (const char *filename)
 548 {
 549 #ifdef HAVE_ACCESS
 550   return access (filename, F_OK) >= 0;
 551 #else
 552   struct stat buf;
 553   return stat (filename, &buf) >= 0;
 554 #endif
 555 }
 556
 557 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 558    Returns 0 on error.  */
 559 int
 560 file_non_directory_p (const char *path)
 561 {
 562   struct stat buf;
 563   /* Use lstat() rather than stat() so that symbolic links pointing to
 564      directories can be identified correctly.  */
 565   if (lstat (path, &buf) != 0)
 566     return 0;
 567   return S_ISDIR (buf.st_mode) ? 0 : 1;
 568 }
 569
 570 /* Return the size of file named by FILENAME, or -1 if it cannot be
 571    opened or seeked into. */
 572 long
 573 file_size (const char *filename)
 574 {
 575   long size;
 576   /* We use fseek rather than stat to determine the file size because
 577      that way we can also verify whether the file is readable.
 578      Inspired by the POST patch by Arnaud Wylie.  */
 579   FILE *fp = fopen (filename, "rb");
 580   if (!fp)
 581     return -1;
 582   fseek (fp, 0, SEEK_END);
 583   size = ftell (fp);
 584   fclose (fp);
 585   return size;
 586 }
 587
 588 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 589    doesn't exist is found.  Return a freshly allocated copy of the
 590    unused file name.  */
 591
 592 static char *
 593 unique_name_1 (const char *prefix)
 594 {
 595   int count = 1;
 596   int plen = strlen (prefix);
 597   char *template = (char *)alloca (plen + 1 + 24);
 598   char *template_tail = template + plen;
 599
 600   memcpy (template, prefix, plen);
 601   *template_tail++ = '.';
 602
 603   do
 604     number_to_string (template_tail, count++);
 605   while (file_exists_p (template));
 606
 607   return xstrdup (template);
 608 }
 609
 610 /* Return a unique file name, based on FILE.
 611
 612    More precisely, if FILE doesn't exist, it is returned unmodified.
 613    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 614    file name that doesn't exist is returned.
 615
 616    The resulting file is not created, only verified that it didn't
 617    exist at the point in time when the function was called.
 618    Therefore, where security matters, don't rely that the file created
 619    by this function exists until you open it with O_EXCL or
 620    something.
 621
 622    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 623    string.  Otherwise, it may return FILE if the file doesn't exist
 624    (and therefore doesn't need changing).  */
 625
 626 char *
 627 unique_name (const char *file, int allow_passthrough)
 628 {
 629   /* If the FILE itself doesn't exist, return it without
 630      modification. */
 631   if (!file_exists_p (file))
 632     return allow_passthrough ? (char *)file : xstrdup (file);
 633
 634   /* Otherwise, find a numeric suffix that results in unused file name
 635      and return it.  */
 636   return unique_name_1 (file);
 637 }
 638 \f
 639 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 640    are missing, create them first.  In case any mkdir() call fails,
 641    return its error status.  Returns 0 on successful completion.
 642
 643    The behaviour of this function should be identical to the behaviour
 644    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 645 int
 646 make_directory (const char *directory)
 647 {
 648   int quit = 0;
 649   int i;
 650   int ret = 0;
 651   char *dir;
 652
 653   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 654      function is unsafe if called with a read-only char *argument.  */
 655   STRDUP_ALLOCA (dir, directory);
 656
 657   /* If the first character of dir is '/', skip it (and thus enable
 658      creation of absolute-pathname directories.  */
 659   for (i = (*dir == '/'); 1; ++i)
 660     {
 661       for (; dir[i] && dir[i] != '/'; i++)
 662         ;
 663       if (!dir[i])
 664         quit = 1;
 665       dir[i] = '\0';
 666       /* Check whether the directory already exists.  Allow creation of
 667          of intermediate directories to fail, as the initial path components
 668          are not necessarily directories!  */
 669       if (!file_exists_p (dir))
 670         ret = mkdir (dir, 0777);
 671       else
 672         ret = 0;
 673       if (quit)
 674         break;
 675       else
 676         dir[i] = '/';
 677     }
 678   return ret;
 679 }
 680
 681 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 682    should be a file name.
 683
 684    file_merge("/foo/bar", "baz")  => "/foo/baz"
 685    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 686    file_merge("foo", "bar")       => "bar"
 687
 688    In other words, it's a simpler and gentler version of uri_merge_1.  */
 689
 690 char *
 691 file_merge (const char *base, const char *file)
 692 {
 693   char *result;
 694   const char *cut = (const char *)strrchr (base, '/');
 695
 696   if (!cut)
 697     return xstrdup (file);
 698
 699   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 700   memcpy (result, base, cut - base);
 701   result[cut - base] = '/';
 702   strcpy (result + (cut - base) + 1, file);
 703
 704   return result;
 705 }
 706 \f
 707 static int in_acclist PARAMS ((const char *const *, const char *, int));
 708
 709 /* Determine whether a file is acceptable to be followed, according to
 710    lists of patterns to accept/reject.  */
 711 int
 712 acceptable (const char *s)
 713 {
 714   int l = strlen (s);
 715
 716   while (l && s[l] != '/')
 717     --l;
 718   if (s[l] == '/')
 719     s += (l + 1);
 720   if (opt.accepts)
 721     {
 722       if (opt.rejects)
 723         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 724                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 725       else
 726         return in_acclist ((const char *const *)opt.accepts, s, 1);
 727     }
 728   else if (opt.rejects)
 729     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 730   return 1;
 731 }
 732
 733 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 734    `/something', frontcmp() will return 1 only if S2 begins with
 735    `/something'.  Otherwise, 0 is returned.  */
 736 int
 737 frontcmp (const char *s1, const char *s2)
 738 {
 739   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 740   return !*s1;
 741 }
 742
 743 /* Iterate through STRLIST, and return the first element that matches
 744    S, through wildcards or front comparison (as appropriate).  */
 745 static char *
 746 proclist (char **strlist, const char *s, enum accd flags)
 747 {
 748   char **x;
 749
 750   for (x = strlist; *x; x++)
 751     if (has_wildcards_p (*x))
 752       {
 753         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 754           break;
 755       }
 756     else
 757       {
 758         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 759         if (frontcmp (p, s))
 760           break;
 761       }
 762   return *x;
 763 }
 764
 765 /* Returns whether DIRECTORY is acceptable for download, wrt the
 766    include/exclude lists.
 767
 768    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 769    and absolute paths may be freely intermixed.  */
 770 int
 771 accdir (const char *directory, enum accd flags)
 772 {
 773   /* Remove starting '/'.  */
 774   if (flags & ALLABS && *directory == '/')
 775     ++directory;
 776   if (opt.includes)
 777     {
 778       if (!proclist (opt.includes, directory, flags))
 779         return 0;
 780     }
 781   if (opt.excludes)
 782     {
 783       if (proclist (opt.excludes, directory, flags))
 784         return 0;
 785     }
 786   return 1;
 787 }
 788
 789 /* Return non-zero if STRING ends with TAIL.  For instance:
 790
 791    match_tail ("abc", "bc", 0)  -> 1
 792    match_tail ("abc", "ab", 0)  -> 0
 793    match_tail ("abc", "abc", 0) -> 1
 794
 795    If FOLD_CASE_P is non-zero, the comparison will be
 796    case-insensitive.  */
 797
 798 int
 799 match_tail (const char *string, const char *tail, int fold_case_p)
 800 {
 801   int i, j;
 802
 803   /* We want this to be fast, so we code two loops, one with
 804      case-folding, one without. */
 805
 806   if (!fold_case_p)
 807     {
 808       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 809         if (string[i] != tail[j])
 810           break;
 811     }
 812   else
 813     {
 814       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 815         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 816           break;
 817     }
 818
 819   /* If the tail was exhausted, the match was succesful.  */
 820   if (j == -1)
 821     return 1;
 822   else
 823     return 0;
 824 }
 825
 826 /* Checks whether string S matches each element of ACCEPTS.  A list
 827    element are matched either with fnmatch() or match_tail(),
 828    according to whether the element contains wildcards or not.
 829
 830    If the BACKWARD is 0, don't do backward comparison -- just compare
 831    them normally.  */
 832 static int
 833 in_acclist (const char *const *accepts, const char *s, int backward)
 834 {
 835   for (; *accepts; accepts++)
 836     {
 837       if (has_wildcards_p (*accepts))
 838         {
 839           /* fnmatch returns 0 if the pattern *does* match the
 840              string.  */
 841           if (fnmatch (*accepts, s, 0) == 0)
 842             return 1;
 843         }
 844       else
 845         {
 846           if (backward)
 847             {
 848               if (match_tail (s, *accepts, 0))
 849                 return 1;
 850             }
 851           else
 852             {
 853               if (!strcmp (s, *accepts))
 854                 return 1;
 855             }
 856         }
 857     }
 858   return 0;
 859 }
 860
 861 /* Return the location of STR's suffix (file extension).  Examples:
 862    suffix ("foo.bar")       -> "bar"
 863    suffix ("foo.bar.baz")   -> "baz"
 864    suffix ("/foo/bar")      -> NULL
 865    suffix ("/foo.bar/baz")  -> NULL  */
 866 char *
 867 suffix (const char *str)
 868 {
 869   int i;
 870
 871   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 872     ;
 873
 874   if (str[i++] == '.')
 875     return (char *)str + i;
 876   else
 877     return NULL;
 878 }
 879
 880 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
 881    `]').  */
 882
 883 int
 884 has_wildcards_p (const char *s)
 885 {
 886   for (; *s; s++)
 887     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 888       return 1;
 889   return 0;
 890 }
 891
 892 /* Return non-zero if FNAME ends with a typical HTML suffix.  The
 893    following (case-insensitive) suffixes are presumed to be HTML files:
 894
 895      html
 896      htm
 897      ?html (`?' matches one character)
 898
 899    #### CAVEAT.  This is not necessarily a good indication that FNAME
 900    refers to a file that contains HTML!  */
 901 int
 902 has_html_suffix_p (const char *fname)
 903 {
 904   char *suf;
 905
 906   if ((suf = suffix (fname)) == NULL)
 907     return 0;
 908   if (!strcasecmp (suf, "html"))
 909     return 1;
 910   if (!strcasecmp (suf, "htm"))
 911     return 1;
 912   if (suf[0] && !strcasecmp (suf + 1, "html"))
 913     return 1;
 914   return 0;
 915 }
 916
 917 /* Read a line from FP and return the pointer to freshly allocated
 918    storage.  The storage space is obtained through malloc() and should
 919    be freed with free() when it is no longer needed.
 920
 921    The length of the line is not limited, except by available memory.
 922    The newline character at the end of line is retained.  The line is
 923    terminated with a zero character.
 924
 925    After end-of-file is encountered without anything being read, NULL
 926    is returned.  NULL is also returned on error.  To distinguish
 927    between these two cases, use the stdio function ferror().  */
 928
 929 char *
 930 read_whole_line (FILE *fp)
 931 {
 932   int length = 0;
 933   int bufsize = 82;
 934   char *line = (char *)xmalloc (bufsize);
 935
 936   while (fgets (line + length, bufsize - length, fp))
 937     {
 938       length += strlen (line + length);
 939       if (length == 0)
 940         /* Possible for example when reading from a binary file where
 941            a line begins with \0.  */
 942         continue;
 943
 944       if (line[length - 1] == '\n')
 945         break;
 946
 947       /* fgets() guarantees to read the whole line, or to use up the
 948          space we've given it.  We can double the buffer
 949          unconditionally.  */
 950       bufsize <<= 1;
 951       line = xrealloc (line, bufsize);
 952     }
 953   if (length == 0 || ferror (fp))
 954     {
 955       xfree (line);
 956       return NULL;
 957     }
 958   if (length + 1 < bufsize)
 959     /* Relieve the memory from our exponential greediness.  We say
 960        `length + 1' because the terminating \0 is not included in
 961        LENGTH.  We don't need to zero-terminate the string ourselves,
 962        though, because fgets() does that.  */
 963     line = xrealloc (line, length + 1);
 964   return line;
 965 }
 966 \f
 967 /* Read FILE into memory.  A pointer to `struct file_memory' are
 968    returned; use struct element `content' to access file contents, and
 969    the element `length' to know the file length.  `content' is *not*
 970    zero-terminated, and you should *not* read or write beyond the [0,
 971    length) range of characters.
 972
 973    After you are done with the file contents, call read_file_free to
 974    release the memory.
 975
 976    Depending on the operating system and the type of file that is
 977    being read, read_file() either mmap's the file into memory, or
 978    reads the file into the core using read().
 979
 980    If file is named "-", fileno(stdin) is used for reading instead.
 981    If you want to read from a real file named "-", use "./-" instead.  */
 982
 983 struct file_memory *
 984 read_file (const char *file)
 985 {
 986   int fd;
 987   struct file_memory *fm;
 988   long size;
 989   int inhibit_close = 0;
 990
 991   /* Some magic in the finest tradition of Perl and its kin: if FILE
 992      is "-", just use stdin.  */
 993   if (HYPHENP (file))
 994     {
 995       fd = fileno (stdin);
 996       inhibit_close = 1;
 997       /* Note that we don't inhibit mmap() in this case.  If stdin is
 998          redirected from a regular file, mmap() will still work.  */
 999     }
1000   else
1001     fd = open (file, O_RDONLY);
1002   if (fd < 0)
1003     return NULL;
1004   fm = xmalloc (sizeof (struct file_memory));
1005
1006 #ifdef HAVE_MMAP
1007   {
1008     struct stat buf;
1009     if (fstat (fd, &buf) < 0)
1010       goto mmap_lose;
1011     fm->length = buf.st_size;
1012     /* NOTE: As far as I know, the callers of this function never
1013        modify the file text.  Relying on this would enable us to
1014        specify PROT_READ and MAP_SHARED for a marginal gain in
1015        efficiency, but at some cost to generality.  */
1016     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1017                         MAP_PRIVATE, fd, 0);
1018     if (fm->content == (char *)MAP_FAILED)
1019       goto mmap_lose;
1020     if (!inhibit_close)
1021       close (fd);
1022
1023     fm->mmap_p = 1;
1024     return fm;
1025   }
1026
1027  mmap_lose:
1028   /* The most common reason why mmap() fails is that FD does not point
1029      to a plain file.  However, it's also possible that mmap() doesn't
1030      work for a particular type of file.  Therefore, whenever mmap()
1031      fails, we just fall back to the regular method.  */
1032 #endif /* HAVE_MMAP */
1033
1034   fm->length = 0;
1035   size = 512;                   /* number of bytes fm->contents can
1036                                    hold at any given time. */
1037   fm->content = xmalloc (size);
1038   while (1)
1039     {
1040       long nread;
1041       if (fm->length > size / 2)
1042         {
1043           /* #### I'm not sure whether the whole exponential-growth
1044              thing makes sense with kernel read.  On Linux at least,
1045              read() refuses to read more than 4K from a file at a
1046              single chunk anyway.  But other Unixes might optimize it
1047              better, and it doesn't *hurt* anything, so I'm leaving
1048              it.  */
1049
1050           /* Normally, we grow SIZE exponentially to make the number
1051              of calls to read() and realloc() logarithmic in relation
1052              to file size.  However, read() can read an amount of data
1053              smaller than requested, and it would be unreasonable to
1054              double SIZE every time *something* was read.  Therefore,
1055              we double SIZE only when the length exceeds half of the
1056              entire allocated size.  */
1057           size <<= 1;
1058           fm->content = xrealloc (fm->content, size);
1059         }
1060       nread = read (fd, fm->content + fm->length, size - fm->length);
1061       if (nread > 0)
1062         /* Successful read. */
1063         fm->length += nread;
1064       else if (nread < 0)
1065         /* Error. */
1066         goto lose;
1067       else
1068         /* EOF */
1069         break;
1070     }
1071   if (!inhibit_close)
1072     close (fd);
1073   if (size > fm->length && fm->length != 0)
1074     /* Due to exponential growth of fm->content, the allocated region
1075        might be much larger than what is actually needed.  */
1076     fm->content = xrealloc (fm->content, fm->length);
1077   fm->mmap_p = 0;
1078   return fm;
1079
1080  lose:
1081   if (!inhibit_close)
1082     close (fd);
1083   xfree (fm->content);
1084   xfree (fm);
1085   return NULL;
1086 }
1087
1088 /* Release the resources held by FM.  Specifically, this calls
1089    munmap() or xfree() on fm->content, depending whether mmap or
1090    malloc/read were used to read in the file.  It also frees the
1091    memory needed to hold the FM structure itself.  */
1092
1093 void
1094 read_file_free (struct file_memory *fm)
1095 {
1096 #ifdef HAVE_MMAP
1097   if (fm->mmap_p)
1098     {
1099       munmap (fm->content, fm->length);
1100     }
1101   else
1102 #endif
1103     {
1104       xfree (fm->content);
1105     }
1106   xfree (fm);
1107 }
1108 \f
1109 /* Free the pointers in a NULL-terminated vector of pointers, then
1110    free the pointer itself.  */
1111 void
1112 free_vec (char **vec)
1113 {
1114   if (vec)
1115     {
1116       char **p = vec;
1117       while (*p)
1118         xfree (*p++);
1119       xfree (vec);
1120     }
1121 }
1122
1123 /* Append vector V2 to vector V1.  The function frees V2 and
1124    reallocates V1 (thus you may not use the contents of neither
1125    pointer after the call).  If V1 is NULL, V2 is returned.  */
1126 char **
1127 merge_vecs (char **v1, char **v2)
1128 {
1129   int i, j;
1130
1131   if (!v1)
1132     return v2;
1133   if (!v2)
1134     return v1;
1135   if (!*v2)
1136     {
1137       /* To avoid j == 0 */
1138       xfree (v2);
1139       return v1;
1140     }
1141   /* Count v1.  */
1142   for (i = 0; v1[i]; i++);
1143   /* Count v2.  */
1144   for (j = 0; v2[j]; j++);
1145   /* Reallocate v1.  */
1146   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1147   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1148   xfree (v2);
1149   return v1;
1150 }
1151
1152 /* A set of simple-minded routines to store strings in a linked list.
1153    This used to also be used for searching, but now we have hash
1154    tables for that.  */
1155
1156 /* It's a shame that these simple things like linked lists and hash
1157    tables (see hash.c) need to be implemented over and over again.  It
1158    would be nice to be able to use the routines from glib -- see
1159    www.gtk.org for details.  However, that would make Wget depend on
1160    glib, and I want to avoid dependencies to external libraries for
1161    reasons of convenience and portability (I suspect Wget is more
1162    portable than anything ever written for Gnome).  */
1163
1164 /* Append an element to the list.  If the list has a huge number of
1165    elements, this can get slow because it has to find the list's
1166    ending.  If you think you have to call slist_append in a loop,
1167    think about calling slist_prepend() followed by slist_nreverse().  */
1168
1169 slist *
1170 slist_append (slist *l, const char *s)
1171 {
1172   slist *newel = (slist *)xmalloc (sizeof (slist));
1173   slist *beg = l;
1174
1175   newel->string = xstrdup (s);
1176   newel->next = NULL;
1177
1178   if (!l)
1179     return newel;
1180   /* Find the last element.  */
1181   while (l->next)
1182     l = l->next;
1183   l->next = newel;
1184   return beg;
1185 }
1186
1187 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1188
1189 slist *
1190 slist_prepend (slist *l, const char *s)
1191 {
1192   slist *newel = (slist *)xmalloc (sizeof (slist));
1193   newel->string = xstrdup (s);
1194   newel->next = l;
1195   return newel;
1196 }
1197
1198 /* Destructively reverse L. */
1199
1200 slist *
1201 slist_nreverse (slist *l)
1202 {
1203   slist *prev = NULL;
1204   while (l)
1205     {
1206       slist *next = l->next;
1207       l->next = prev;
1208       prev = l;
1209       l = next;
1210     }
1211   return prev;
1212 }
1213
1214 /* Is there a specific entry in the list?  */
1215 int
1216 slist_contains (slist *l, const char *s)
1217 {
1218   for (; l; l = l->next)
1219     if (!strcmp (l->string, s))
1220       return 1;
1221   return 0;
1222 }
1223
1224 /* Free the whole slist.  */
1225 void
1226 slist_free (slist *l)
1227 {
1228   while (l)
1229     {
1230       slist *n = l->next;
1231       xfree (l->string);
1232       xfree (l);
1233       l = n;
1234     }
1235 }
1236 \f
1237 /* Sometimes it's useful to create "sets" of strings, i.e. special
1238    hash tables where you want to store strings as keys and merely
1239    query for their existence.  Here is a set of utility routines that
1240    makes that transparent.  */
1241
1242 void
1243 string_set_add (struct hash_table *ht, const char *s)
1244 {
1245   /* First check whether the set element already exists.  If it does,
1246      do nothing so that we don't have to free() the old element and
1247      then strdup() a new one.  */
1248   if (hash_table_contains (ht, s))
1249     return;
1250
1251   /* We use "1" as value.  It provides us a useful and clear arbitrary
1252      value, and it consumes no memory -- the pointers to the same
1253      string "1" will be shared by all the key-value pairs in all `set'
1254      hash tables.  */
1255   hash_table_put (ht, xstrdup (s), "1");
1256 }
1257
1258 /* Synonym for hash_table_contains... */
1259
1260 int
1261 string_set_contains (struct hash_table *ht, const char *s)
1262 {
1263   return hash_table_contains (ht, s);
1264 }
1265
1266 static int
1267 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1268 {
1269   xfree (key);
1270   return 0;
1271 }
1272
1273 void
1274 string_set_free (struct hash_table *ht)
1275 {
1276   hash_table_map (ht, string_set_free_mapper, NULL);
1277   hash_table_destroy (ht);
1278 }
1279
1280 static int
1281 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1282 {
1283   xfree (key);
1284   xfree (value);
1285   return 0;
1286 }
1287
1288 /* Another utility function: call free() on all keys and values of HT.  */
1289
1290 void
1291 free_keys_and_values (struct hash_table *ht)
1292 {
1293   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1294 }
1295
1296 \f
1297 /* Engine for legible and legible_large_int; add thousand separators
1298    to numbers printed in strings.  */
1299
1300 static char *
1301 legible_1 (const char *repr)
1302 {
1303   static char outbuf[48];
1304   int i, i1, mod;
1305   char *outptr;
1306   const char *inptr;
1307
1308   /* Reset the pointers.  */
1309   outptr = outbuf;
1310   inptr = repr;
1311
1312   /* Ignore the sign for the purpose of adding thousand
1313      separators.  */
1314   if (*inptr == '-')
1315     {
1316       *outptr++ = '-';
1317       ++inptr;
1318     }
1319   /* How many digits before the first separator?  */
1320   mod = strlen (inptr) % 3;
1321   /* Insert them.  */
1322   for (i = 0; i < mod; i++)
1323     *outptr++ = inptr[i];
1324   /* Now insert the rest of them, putting separator before every
1325      third digit.  */
1326   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1327     {
1328       if (i % 3 == 0 && i1 != 0)
1329         *outptr++ = ',';
1330       *outptr++ = inptr[i1];
1331     }
1332   /* Zero-terminate the string.  */
1333   *outptr = '\0';
1334   return outbuf;
1335 }
1336
1337 /* Legible -- return a static pointer to the legibly printed long.  */
1338
1339 char *
1340 legible (long l)
1341 {
1342   char inbuf[24];
1343   /* Print the number into the buffer.  */
1344   number_to_string (inbuf, l);
1345   return legible_1 (inbuf);
1346 }
1347
1348 /* Write a string representation of LARGE_INT NUMBER into the provided
1349    buffer.  The buffer should be able to accept 24 characters,
1350    including the terminating zero.
1351
1352    It would be dangerous to use sprintf, because the code wouldn't
1353    work on a machine with gcc-provided long long support, but without
1354    libc support for "%lld".  However, such platforms will typically
1355    not have snprintf and will use our version, which does support
1356    "%lld" where long longs are available.  */
1357
1358 static void
1359 large_int_to_string (char *buffer, LARGE_INT number)
1360 {
1361   snprintf (buffer, 24, LARGE_INT_FMT, number);
1362 }
1363
1364 /* The same as legible(), but works on LARGE_INT.  */
1365
1366 char *
1367 legible_large_int (LARGE_INT l)
1368 {
1369   char inbuf[48];
1370   large_int_to_string (inbuf, l);
1371   return legible_1 (inbuf);
1372 }
1373
1374 /* Count the digits in a (long) integer.  */
1375 int
1376 numdigit (long number)
1377 {
1378   int cnt = 1;
1379   if (number < 0)
1380     {
1381       number = -number;
1382       ++cnt;
1383     }
1384   while ((number /= 10) > 0)
1385     ++cnt;
1386   return cnt;
1387 }
1388
1389 /* A half-assed implementation of INT_MAX on machines that don't
1390    bother to define one. */
1391 #ifndef INT_MAX
1392 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1393 #endif
1394
1395 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1396 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1397
1398 #define DIGITS_1(figure) ONE_DIGIT (figure)
1399 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1400 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1401 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1402 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1403 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1404 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1405 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1406 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1407 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1408
1409 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1410
1411 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1412 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1413 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1414 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1415 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1416 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1417 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1418 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1419 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1420
1421 /* Print NUMBER to BUFFER in base 10.  This should be completely
1422    equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1423
1424    The speedup may make a difference in programs that frequently
1425    convert numbers to strings.  Some implementations of sprintf,
1426    particularly the one in GNU libc, have been known to be extremely
1427    slow compared to this function.
1428
1429    Return the pointer to the location where the terminating zero was
1430    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1431    function is done.)
1432
1433    BUFFER should be big enough to accept as many bytes as you expect
1434    the number to take up.  On machines with 64-bit longs the maximum
1435    needed size is 24 bytes.  That includes the digits needed for the
1436    largest 64-bit number, the `-' sign in case it's negative, and the
1437    terminating '\0'.  */
1438
1439 char *
1440 number_to_string (char *buffer, long number)
1441 {
1442   char *p = buffer;
1443   long n = number;
1444
1445 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1446   /* We are running in a strange or misconfigured environment.  Let
1447      sprintf cope with it.  */
1448   sprintf (buffer, "%ld", n);
1449   p += strlen (buffer);
1450 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1451
1452   if (n < 0)
1453     {
1454       if (n < -INT_MAX)
1455         {
1456           /* We cannot print a '-' and assign -n to n because -n would
1457              overflow.  Let sprintf deal with this border case.  */
1458           sprintf (buffer, "%ld", n);
1459           p += strlen (buffer);
1460           return p;
1461         }
1462
1463       *p++ = '-';
1464       n = -n;
1465     }
1466
1467   if      (n < 10)                   { DIGITS_1 (1); }
1468   else if (n < 100)                  { DIGITS_2 (10); }
1469   else if (n < 1000)                 { DIGITS_3 (100); }
1470   else if (n < 10000)                { DIGITS_4 (1000); }
1471   else if (n < 100000)               { DIGITS_5 (10000); }
1472   else if (n < 1000000)              { DIGITS_6 (100000); }
1473   else if (n < 10000000)             { DIGITS_7 (1000000); }
1474   else if (n < 100000000)            { DIGITS_8 (10000000); }
1475   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1476 #if SIZEOF_LONG == 4
1477   /* ``if (1)'' serves only to preserve editor indentation. */
1478   else if (1)                        { DIGITS_10 (1000000000); }
1479 #else  /* SIZEOF_LONG != 4 */
1480   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1481   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1482   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1483   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1484   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1485   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1486   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1487   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1488   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1489   else                               { DIGITS_19 (1000000000000000000L); }
1490 #endif /* SIZEOF_LONG != 4 */
1491
1492   *p = '\0';
1493 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1494
1495   return p;
1496 }
1497
1498 #undef ONE_DIGIT
1499 #undef ONE_DIGIT_ADVANCE
1500
1501 #undef DIGITS_1
1502 #undef DIGITS_2
1503 #undef DIGITS_3
1504 #undef DIGITS_4
1505 #undef DIGITS_5
1506 #undef DIGITS_6
1507 #undef DIGITS_7
1508 #undef DIGITS_8
1509 #undef DIGITS_9
1510 #undef DIGITS_10
1511 #undef DIGITS_11
1512 #undef DIGITS_12
1513 #undef DIGITS_13
1514 #undef DIGITS_14
1515 #undef DIGITS_15
1516 #undef DIGITS_16
1517 #undef DIGITS_17
1518 #undef DIGITS_18
1519 #undef DIGITS_19
1520 \f
1521 /* Support for timers. */
1522
1523 #undef TIMER_WINDOWS
1524 #undef TIMER_GETTIMEOFDAY
1525 #undef TIMER_TIME
1526
1527 /* Depending on the OS and availability of gettimeofday(), one and
1528    only one of the above constants will be defined.  Virtually all
1529    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1530    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1531    non-Windows systems without gettimeofday.
1532
1533    #### Perhaps we should also support ftime(), which exists on old
1534    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1535    C, if memory serves me.)  */
1536
1537 #ifdef WINDOWS
1538 # define TIMER_WINDOWS
1539 #else  /* not WINDOWS */
1540 # ifdef HAVE_GETTIMEOFDAY
1541 #  define TIMER_GETTIMEOFDAY
1542 # else
1543 #  define TIMER_TIME
1544 # endif
1545 #endif /* not WINDOWS */
1546
1547 #ifdef TIMER_GETTIMEOFDAY
1548 typedef struct timeval wget_sys_time;
1549 #endif
1550
1551 #ifdef TIMER_TIME
1552 typedef time_t wget_sys_time;
1553 #endif
1554
1555 #ifdef TIMER_WINDOWS
1556 typedef ULARGE_INTEGER wget_sys_time;
1557 #endif
1558
1559 struct wget_timer {
1560   /* The starting point in time which, subtracted from the current
1561      time, yields elapsed time. */
1562   wget_sys_time start;
1563
1564   /* The most recent elapsed time, calculated by wtimer_elapsed().
1565      Measured in milliseconds.  */
1566   double elapsed_last;
1567
1568   /* Approximately, the time elapsed between the true start of the
1569      measurement and the time represented by START.  */
1570   double elapsed_pre_start;
1571 };
1572
1573 /* Allocate a timer.  It is not legal to do anything with a freshly
1574    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1575
1576 struct wget_timer *
1577 wtimer_allocate (void)
1578 {
1579   struct wget_timer *wt =
1580     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1581   return wt;
1582 }
1583
1584 /* Allocate a new timer and reset it.  Return the new timer. */
1585
1586 struct wget_timer *
1587 wtimer_new (void)
1588 {
1589   struct wget_timer *wt = wtimer_allocate ();
1590   wtimer_reset (wt);
1591   return wt;
1592 }
1593
1594 /* Free the resources associated with the timer.  Its further use is
1595    prohibited.  */
1596
1597 void
1598 wtimer_delete (struct wget_timer *wt)
1599 {
1600   xfree (wt);
1601 }
1602
1603 /* Store system time to WST.  */
1604
1605 static void
1606 wtimer_sys_set (wget_sys_time *wst)
1607 {
1608 #ifdef TIMER_GETTIMEOFDAY
1609   gettimeofday (wst, NULL);
1610 #endif
1611
1612 #ifdef TIMER_TIME
1613   time (wst);
1614 #endif
1615
1616 #ifdef TIMER_WINDOWS
1617   /* We use GetSystemTime to get the elapsed time.  MSDN warns that
1618      system clock adjustments can skew the output of GetSystemTime
1619      when used as a timer and gives preference to GetTickCount and
1620      high-resolution timers.  But GetTickCount can overflow, and hires
1621      timers are typically used for profiling, not for regular time
1622      measurement.  Since we handle clock skew anyway, we just use
1623      GetSystemTime.  */
1624   FILETIME ft;
1625   SYSTEMTIME st;
1626   GetSystemTime (&st);
1627
1628   /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1629      FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1630      arithmetic on that.  */
1631   SystemTimeToFileTime (&st, &ft);
1632   wst->HighPart = ft.dwHighDateTime;
1633   wst->LowPart  = ft.dwLowDateTime;
1634 #endif
1635 }
1636
1637 /* Reset timer WT.  This establishes the starting point from which
1638    wtimer_elapsed() will return the number of elapsed
1639    milliseconds.  It is allowed to reset a previously used timer.  */
1640
1641 void
1642 wtimer_reset (struct wget_timer *wt)
1643 {
1644   /* Set the start time to the current time. */
1645   wtimer_sys_set (&wt->start);
1646   wt->elapsed_last = 0;
1647   wt->elapsed_pre_start = 0;
1648 }
1649
1650 static double
1651 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1652 {
1653 #ifdef TIMER_GETTIMEOFDAY
1654   return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1655           + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1656 #endif
1657
1658 #ifdef TIMER_TIME
1659   return 1000 * (*wst1 - *wst2);
1660 #endif
1661
1662 #ifdef WINDOWS
1663   /* VC++ 6 doesn't support direct cast of uint64 to double.  To work
1664      around this, we subtract, then convert to signed, then finally to
1665      double.  */
1666   return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1667 #endif
1668 }
1669
1670 /* Return the number of milliseconds elapsed since the timer was last
1671    reset.  It is allowed to call this function more than once to get
1672    increasingly higher elapsed values.  These timers handle clock
1673    skew.  */
1674
1675 double
1676 wtimer_elapsed (struct wget_timer *wt)
1677 {
1678   wget_sys_time now;
1679   double elapsed;
1680
1681   wtimer_sys_set (&now);
1682   elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1683
1684   /* Ideally we'd just return the difference between NOW and
1685      wt->start.  However, the system timer can be set back, and we
1686      could return a value smaller than when we were last called, even
1687      a negative value.  Both of these would confuse the callers, which
1688      expect us to return monotonically nondecreasing values.
1689
1690      Therefore: if ELAPSED is smaller than its previous known value,
1691      we reset wt->start to the current time and effectively start
1692      measuring from this point.  But since we don't want the elapsed
1693      value to start from zero, we set elapsed_pre_start to the last
1694      elapsed time and increment all future calculations by that
1695      amount.  */
1696
1697   if (elapsed < wt->elapsed_last)
1698     {
1699       wt->start = now;
1700       wt->elapsed_pre_start = wt->elapsed_last;
1701       elapsed = wt->elapsed_last;
1702     }
1703
1704   wt->elapsed_last = elapsed;
1705   return elapsed;
1706 }
1707
1708 /* Return the assessed granularity of the timer implementation, in
1709    milliseconds.  This is used by code that tries to substitute a
1710    better value for timers that have returned zero.  */
1711
1712 double
1713 wtimer_granularity (void)
1714 {
1715 #ifdef TIMER_GETTIMEOFDAY
1716   /* Granularity of gettimeofday varies wildly between architectures.
1717      However, it appears that on modern machines it tends to be better
1718      than 1ms.  Assume 100 usecs.  (Perhaps the configure process
1719      could actually measure this?)  */
1720   return 0.1;
1721 #endif
1722
1723 #ifdef TIMER_TIME
1724   return 1000;
1725 #endif
1726
1727 #ifdef TIMER_WINDOWS
1728   /* According to MSDN, GetSystemTime returns a broken-down time
1729      structure the smallest member of which are milliseconds.  */
1730   return 1;
1731 #endif
1732 }
1733 \f
1734 /* This should probably be at a better place, but it doesn't really
1735    fit into html-parse.c.  */
1736
1737 /* The function returns the pointer to the malloc-ed quoted version of
1738    string s.  It will recognize and quote numeric and special graphic
1739    entities, as per RFC1866:
1740
1741    `&' -> `&amp;'
1742    `<' -> `&lt;'
1743    `>' -> `&gt;'
1744    `"' -> `&quot;'
1745    SP  -> `&#32;'
1746
1747    No other entities are recognized or replaced.  */
1748 char *
1749 html_quote_string (const char *s)
1750 {
1751   const char *b = s;
1752   char *p, *res;
1753   int i;
1754
1755   /* Pass through the string, and count the new size.  */
1756   for (i = 0; *s; s++, i++)
1757     {
1758       if (*s == '&')
1759         i += 4;                 /* `amp;' */
1760       else if (*s == '<' || *s == '>')
1761         i += 3;                 /* `lt;' and `gt;' */
1762       else if (*s == '\"')
1763         i += 5;                 /* `quot;' */
1764       else if (*s == ' ')
1765         i += 4;                 /* #32; */
1766     }
1767   res = (char *)xmalloc (i + 1);
1768   s = b;
1769   for (p = res; *s; s++)
1770     {
1771       switch (*s)
1772         {
1773         case '&':
1774           *p++ = '&';
1775           *p++ = 'a';
1776           *p++ = 'm';
1777           *p++ = 'p';
1778           *p++ = ';';
1779           break;
1780         case '<': case '>':
1781           *p++ = '&';
1782           *p++ = (*s == '<' ? 'l' : 'g');
1783           *p++ = 't';
1784           *p++ = ';';
1785           break;
1786         case '\"':
1787           *p++ = '&';
1788           *p++ = 'q';
1789           *p++ = 'u';
1790           *p++ = 'o';
1791           *p++ = 't';
1792           *p++ = ';';
1793           break;
1794         case ' ':
1795           *p++ = '&';
1796           *p++ = '#';
1797           *p++ = '3';
1798           *p++ = '2';
1799           *p++ = ';';
1800           break;
1801         default:
1802           *p++ = *s;
1803         }
1804     }
1805   *p = '\0';
1806   return res;
1807 }
1808
1809 /* Determine the width of the terminal we're running on.  If that's
1810    not possible, return 0.  */
1811
1812 int
1813 determine_screen_width (void)
1814 {
1815   /* If there's a way to get the terminal size using POSIX
1816      tcgetattr(), somebody please tell me.  */
1817 #ifndef TIOCGWINSZ
1818   return 0;
1819 #else  /* TIOCGWINSZ */
1820   int fd;
1821   struct winsize wsz;
1822
1823   if (opt.lfilename != NULL)
1824     return 0;
1825
1826   fd = fileno (stderr);
1827   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1828     return 0;                   /* most likely ENOTTY */
1829
1830   return wsz.ws_col;
1831 #endif /* TIOCGWINSZ */
1832 }
1833
1834 /* Return a random number between 0 and MAX-1, inclusive.
1835
1836    If MAX is greater than the value of RAND_MAX+1 on the system, the
1837    returned value will be in the range [0, RAND_MAX].  This may be
1838    fixed in a future release.
1839
1840    The random number generator is seeded automatically the first time
1841    it is called.
1842
1843    This uses rand() for portability.  It has been suggested that
1844    random() offers better randomness, but this is not required for
1845    Wget, so I chose to go for simplicity and use rand
1846    unconditionally.
1847
1848    DO NOT use this for cryptographic purposes.  It is only meant to be
1849    used in situations where quality of the random numbers returned
1850    doesn't really matter.  */
1851
1852 int
1853 random_number (int max)
1854 {
1855   static int seeded;
1856   double bounded;
1857   int rnd;
1858
1859   if (!seeded)
1860     {
1861       srand (time (NULL));
1862       seeded = 1;
1863     }
1864   rnd = rand ();
1865
1866   /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1867      and enforce that assumption by masking other bits.  */
1868 #ifndef RAND_MAX
1869 # define RAND_MAX 32767
1870   rnd &= RAND_MAX;
1871 #endif
1872
1873   /* This is equivalent to rand() % max, but uses the high-order bits
1874      for better randomness on architecture where rand() is implemented
1875      using a simple congruential generator.  */
1876
1877   bounded = (double)max * rnd / (RAND_MAX + 1.0);
1878   return (int)bounded;
1879 }
1880
1881 /* Return a random uniformly distributed floating point number in the
1882    [0, 1) range.  The precision of returned numbers is 9 digits.
1883
1884    Modify this to use erand48() where available!  */
1885
1886 double
1887 random_float (void)
1888 {
1889   /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1890      sure it's greater than 1000.  */
1891   int rnd1 = random_number (1000);
1892   int rnd2 = random_number (1000);
1893   int rnd3 = random_number (1000);
1894   return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1895 }
1896
1897 #if 0
1898 /* A debugging function for checking whether an MD5 library works. */
1899
1900 #include "gen-md5.h"
1901
1902 char *
1903 debug_test_md5 (char *buf)
1904 {
1905   unsigned char raw[16];
1906   static char res[33];
1907   unsigned char *p1;
1908   char *p2;
1909   int cnt;
1910   ALLOCA_MD5_CONTEXT (ctx);
1911
1912   gen_md5_init (ctx);
1913   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1914   gen_md5_finish (ctx, raw);
1915
1916   p1 = raw;
1917   p2 = res;
1918   cnt = 16;
1919   while (cnt--)
1920     {
1921       *p2++ = XNUM_TO_digit (*p1 >> 4);
1922       *p2++ = XNUM_TO_digit (*p1 & 0xf);
1923       ++p1;
1924     }
1925   *p2 = '\0';
1926
1927   return res;
1928 }
1929 #endif
1930 \f
1931 /* Implementation of run_with_timeout, a generic timeout-forcing
1932    routine for systems with Unix-like signal handling.  */
1933
1934 #ifdef USE_SIGNAL_TIMEOUT
1935 # ifdef HAVE_SIGSETJMP
1936 #  define SETJMP(env) sigsetjmp (env, 1)
1937
1938 static sigjmp_buf run_with_timeout_env;
1939
1940 static RETSIGTYPE
1941 abort_run_with_timeout (int sig)
1942 {
1943   assert (sig == SIGALRM);
1944   siglongjmp (run_with_timeout_env, -1);
1945 }
1946 # else /* not HAVE_SIGSETJMP */
1947 #  define SETJMP(env) setjmp (env)
1948
1949 static jmp_buf run_with_timeout_env;
1950
1951 static RETSIGTYPE
1952 abort_run_with_timeout (int sig)
1953 {
1954   assert (sig == SIGALRM);
1955   /* We don't have siglongjmp to preserve the set of blocked signals;
1956      if we longjumped out of the handler at this point, SIGALRM would
1957      remain blocked.  We must unblock it manually. */
1958   int mask = siggetmask ();
1959   mask &= ~sigmask (SIGALRM);
1960   sigsetmask (mask);
1961
1962   /* Now it's safe to longjump. */
1963   longjmp (run_with_timeout_env, -1);
1964 }
1965 # endif /* not HAVE_SIGSETJMP */
1966
1967 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1968    setitimer where available, alarm otherwise.
1969
1970    TIMEOUT should be non-zero.  If the timeout value is so small that
1971    it would be rounded to zero, it is rounded to the least legal value
1972    instead (1us for setitimer, 1s for alarm).  That ensures that
1973    SIGALRM will be delivered in all cases.  */
1974
1975 static void
1976 alarm_set (double timeout)
1977 {
1978 #ifdef ITIMER_REAL
1979   /* Use the modern itimer interface. */
1980   struct itimerval itv;
1981   memset (&itv, 0, sizeof (itv));
1982   itv.it_value.tv_sec = (long) timeout;
1983   itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1984   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1985     /* Ensure that we wait for at least the minimum interval.
1986        Specifying zero would mean "wait forever".  */
1987     itv.it_value.tv_usec = 1;
1988   setitimer (ITIMER_REAL, &itv, NULL);
1989 #else  /* not ITIMER_REAL */
1990   /* Use the old alarm() interface. */
1991   int secs = (int) timeout;
1992   if (secs == 0)
1993     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1994        because alarm(0) means "never deliver the alarm", i.e. "wait
1995        forever", which is not what someone who specifies a 0.5s
1996        timeout would expect.  */
1997     secs = 1;
1998   alarm (secs);
1999 #endif /* not ITIMER_REAL */
2000 }
2001
2002 /* Cancel the alarm set with alarm_set. */
2003
2004 static void
2005 alarm_cancel (void)
2006 {
2007 #ifdef ITIMER_REAL
2008   struct itimerval disable;
2009   memset (&disable, 0, sizeof (disable));
2010   setitimer (ITIMER_REAL, &disable, NULL);
2011 #else  /* not ITIMER_REAL */
2012   alarm (0);
2013 #endif /* not ITIMER_REAL */
2014 }
2015
2016 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2017    seconds.  Returns non-zero if the function was interrupted with a
2018    timeout, zero otherwise.
2019
2020    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2021    using setitimer() or alarm().  The timeout is enforced by
2022    longjumping out of the SIGALRM handler.  This has several
2023    advantages compared to the traditional approach of relying on
2024    signals causing system calls to exit with EINTR:
2025
2026      * The callback function is *forcibly* interrupted after the
2027        timeout expires, (almost) regardless of what it was doing and
2028        whether it was in a syscall.  For example, a calculation that
2029        takes a long time is interrupted as reliably as an IO
2030        operation.
2031
2032      * It works with both SYSV and BSD signals because it doesn't
2033        depend on the default setting of SA_RESTART.
2034
2035      * It doesn't special handler setup beyond a simple call to
2036        signal().  (It does use sigsetjmp/siglongjmp, but they're
2037        optional.)
2038
2039    The only downside is that, if FUN allocates internal resources that
2040    are normally freed prior to exit from the functions, they will be
2041    lost in case of timeout.  */
2042
2043 int
2044 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2045 {
2046   int saved_errno;
2047
2048   if (timeout == 0)
2049     {
2050       fun (arg);
2051       return 0;
2052     }
2053
2054   signal (SIGALRM, abort_run_with_timeout);
2055   if (SETJMP (run_with_timeout_env) != 0)
2056     {
2057       /* Longjumped out of FUN with a timeout. */
2058       signal (SIGALRM, SIG_DFL);
2059       return 1;
2060     }
2061   alarm_set (timeout);
2062   fun (arg);
2063
2064   /* Preserve errno in case alarm() or signal() modifies it. */
2065   saved_errno = errno;
2066   alarm_cancel ();
2067   signal (SIGALRM, SIG_DFL);
2068   errno = saved_errno;
2069
2070   return 0;
2071 }
2072
2073 #else  /* not USE_SIGNAL_TIMEOUT */
2074
2075 #ifndef WINDOWS
2076 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2077    define it under Windows, because Windows has its own version of
2078    run_with_timeout that uses threads.  */
2079
2080 int
2081 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2082 {
2083   fun (arg);
2084   return 0;
2085 }
2086 #endif /* not WINDOWS */
2087 #endif /* not USE_SIGNAL_TIMEOUT */