sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53
  54 /* For TIOCGWINSZ and friends: */
  55 #ifdef HAVE_SYS_IOCTL_H
  56 # include <sys/ioctl.h>
  57 #endif
  58 #ifdef HAVE_TERMIOS_H
  59 # include <termios.h>
  60 #endif
  61
  62 #include "wget.h"
  63 #include "utils.h"
  64 #include "fnmatch.h"
  65 #include "hash.h"
  66
  67 #ifndef errno
  68 extern int errno;
  69 #endif
  70
  71 /* This section implements several wrappers around the basic
  72    allocation routines.  This is done for two reasons: first, so that
  73    the callers of these functions need not consistently check for
  74    errors.  If there is not enough virtual memory for running Wget,
  75    something is seriously wrong, and Wget exits with an appropriate
  76    error message.
  77
  78    The second reason why these are useful is that, if DEBUG_MALLOC is
  79    defined, they also provide a handy (if crude) malloc debugging
  80    interface that checks memory leaks.  */
  81
  82 /* Croak the fatal memory error and bail out with non-zero exit
  83    status.  */
  84 static void
  85 memfatal (const char *what)
  86 {
  87   /* Make sure we don't try to store part of the log line, and thus
  88      call malloc.  */
  89   log_set_save_context (0);
  90   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  91   exit (1);
  92 }
  93
  94 /* These functions end with _real because they need to be
  95    distinguished from the debugging functions, and from the macros.
  96    Explanation follows:
  97
  98    If memory debugging is not turned on, wget.h defines these:
  99
 100      #define xmalloc xmalloc_real
 101      #define xrealloc xrealloc_real
 102      #define xstrdup xstrdup_real
 103      #define xfree free
 104
 105    In case of memory debugging, the definitions are a bit more
 106    complex, because we want to provide more information, *and* we want
 107    to call the debugging code.  (The former is the reason why xmalloc
 108    and friends need to be macros in the first place.)  Then it looks
 109    like this:
 110
 111      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 112      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 113      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 114      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 115
 116    Each of the *_debug function does its magic and calls the real one.  */
 117
 118 #ifdef DEBUG_MALLOC
 119 # define STATIC_IF_DEBUG static
 120 #else
 121 # define STATIC_IF_DEBUG
 122 #endif
 123
 124 STATIC_IF_DEBUG void *
 125 xmalloc_real (size_t size)
 126 {
 127   void *ptr = malloc (size);
 128   if (!ptr)
 129     memfatal ("malloc");
 130   return ptr;
 131 }
 132
 133 STATIC_IF_DEBUG void *
 134 xrealloc_real (void *ptr, size_t newsize)
 135 {
 136   void *newptr;
 137
 138   /* Not all Un*xes have the feature of realloc() that calling it with
 139      a NULL-pointer is the same as malloc(), but it is easy to
 140      simulate.  */
 141   if (ptr)
 142     newptr = realloc (ptr, newsize);
 143   else
 144     newptr = malloc (newsize);
 145   if (!newptr)
 146     memfatal ("realloc");
 147   return newptr;
 148 }
 149
 150 STATIC_IF_DEBUG char *
 151 xstrdup_real (const char *s)
 152 {
 153   char *copy;
 154
 155 #ifndef HAVE_STRDUP
 156   int l = strlen (s);
 157   copy = malloc (l + 1);
 158   if (!copy)
 159     memfatal ("strdup");
 160   memcpy (copy, s, l + 1);
 161 #else  /* HAVE_STRDUP */
 162   copy = strdup (s);
 163   if (!copy)
 164     memfatal ("strdup");
 165 #endif /* HAVE_STRDUP */
 166
 167   return copy;
 168 }
 169
 170 #ifdef DEBUG_MALLOC
 171
 172 /* Crude home-grown routines for debugging some malloc-related
 173    problems.  Featured:
 174
 175    * Counting the number of malloc and free invocations, and reporting
 176      the "balance", i.e. how many times more malloc was called than it
 177      was the case with free.
 178
 179    * Making malloc store its entry into a simple array and free remove
 180      stuff from that array.  At the end, print the pointers which have
 181      not been freed, along with the source file and the line number.
 182      This also has the side-effect of detecting freeing memory that
 183      was never allocated.
 184
 185    Note that this kind of memory leak checking strongly depends on
 186    every malloc() being followed by a free(), even if the program is
 187    about to finish.  Wget is careful to free the data structure it
 188    allocated in init.c.  */
 189
 190 static int malloc_count, free_count;
 191
 192 static struct {
 193   char *ptr;
 194   const char *file;
 195   int line;
 196 } malloc_debug[100000];
 197
 198 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 199    which can be a real problem.  It would be nice to use a hash table
 200    for malloc_debug, but the functions in hash.c are not suitable
 201    because they can call malloc() themselves.  Maybe it would work if
 202    the hash table were preallocated to a huge size, and if we set the
 203    rehash threshold to 1.0.  */
 204
 205 /* Register PTR in malloc_debug.  Abort if this is not possible
 206    (presumably due to the number of current allocations exceeding the
 207    size of malloc_debug.)  */
 208
 209 static void
 210 register_ptr (void *ptr, const char *file, int line)
 211 {
 212   int i;
 213   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 214     if (malloc_debug[i].ptr == NULL)
 215       {
 216         malloc_debug[i].ptr = ptr;
 217         malloc_debug[i].file = file;
 218         malloc_debug[i].line = line;
 219         return;
 220       }
 221   abort ();
 222 }
 223
 224 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 225    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 226
 227 static void
 228 unregister_ptr (void *ptr)
 229 {
 230   int i;
 231   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 232     if (malloc_debug[i].ptr == ptr)
 233       {
 234         malloc_debug[i].ptr = NULL;
 235         return;
 236       }
 237   abort ();
 238 }
 239
 240 /* Print the malloc debug stats that can be gathered from the above
 241    information.  Currently this is the count of mallocs, frees, the
 242    difference between the two, and the dump of the contents of
 243    malloc_debug.  The last part are the memory leaks.  */
 244
 245 void
 246 print_malloc_debug_stats (void)
 247 {
 248   int i;
 249   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 250           malloc_count, free_count, malloc_count - free_count);
 251   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 252     if (malloc_debug[i].ptr != NULL)
 253       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 254               malloc_debug[i].file, malloc_debug[i].line);
 255 }
 256
 257 void *
 258 xmalloc_debug (size_t size, const char *source_file, int source_line)
 259 {
 260   void *ptr = xmalloc_real (size);
 261   ++malloc_count;
 262   register_ptr (ptr, source_file, source_line);
 263   return ptr;
 264 }
 265
 266 void
 267 xfree_debug (void *ptr, const char *source_file, int source_line)
 268 {
 269   assert (ptr != NULL);
 270   ++free_count;
 271   unregister_ptr (ptr);
 272   free (ptr);
 273 }
 274
 275 void *
 276 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 277 {
 278   void *newptr = xrealloc_real (ptr, newsize);
 279   if (!ptr)
 280     {
 281       ++malloc_count;
 282       register_ptr (newptr, source_file, source_line);
 283     }
 284   else if (newptr != ptr)
 285     {
 286       unregister_ptr (ptr);
 287       register_ptr (newptr, source_file, source_line);
 288     }
 289   return newptr;
 290 }
 291
 292 char *
 293 xstrdup_debug (const char *s, const char *source_file, int source_line)
 294 {
 295   char *copy = xstrdup_real (s);
 296   ++malloc_count;
 297   register_ptr (copy, source_file, source_line);
 298   return copy;
 299 }
 300
 301 #endif /* DEBUG_MALLOC */
 302 \f
 303 /* Utility function: like xstrdup(), but also lowercases S.  */
 304
 305 char *
 306 xstrdup_lower (const char *s)
 307 {
 308   char *copy = xstrdup (s);
 309   char *p = copy;
 310   for (; *p; p++)
 311     *p = TOLOWER (*p);
 312   return copy;
 313 }
 314
 315 /* Return a count of how many times CHR occurs in STRING. */
 316
 317 int
 318 count_char (const char *string, char chr)
 319 {
 320   const char *p;
 321   int count = 0;
 322   for (p = string; *p; p++)
 323     if (*p == chr)
 324       ++count;
 325   return count;
 326 }
 327
 328 /* Copy the string formed by two pointers (one on the beginning, other
 329    on the char after the last char) to a new, malloc-ed location.
 330    0-terminate it.  */
 331 char *
 332 strdupdelim (const char *beg, const char *end)
 333 {
 334   char *res = (char *)xmalloc (end - beg + 1);
 335   memcpy (res, beg, end - beg);
 336   res[end - beg] = '\0';
 337   return res;
 338 }
 339
 340 /* Parse a string containing comma-separated elements, and return a
 341    vector of char pointers with the elements.  Spaces following the
 342    commas are ignored.  */
 343 char **
 344 sepstring (const char *s)
 345 {
 346   char **res;
 347   const char *p;
 348   int i = 0;
 349
 350   if (!s || !*s)
 351     return NULL;
 352   res = NULL;
 353   p = s;
 354   while (*s)
 355     {
 356       if (*s == ',')
 357         {
 358           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 359           res[i] = strdupdelim (p, s);
 360           res[++i] = NULL;
 361           ++s;
 362           /* Skip the blanks following the ','.  */
 363           while (ISSPACE (*s))
 364             ++s;
 365           p = s;
 366         }
 367       else
 368         ++s;
 369     }
 370   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 371   res[i] = strdupdelim (p, s);
 372   res[i + 1] = NULL;
 373   return res;
 374 }
 375 \f
 376 /* Return pointer to a static char[] buffer in which zero-terminated
 377    string-representation of TM (in form hh:mm:ss) is printed.
 378
 379    If TM is non-NULL, the current time-in-seconds will be stored
 380    there.
 381
 382    (#### This is misleading: one would expect TM would be used instead
 383    of the current time in that case.  This design was probably
 384    influenced by the design time(2), and should be changed at some
 385    points.  No callers use non-NULL TM anyway.)  */
 386
 387 char *
 388 time_str (time_t *tm)
 389 {
 390   static char output[15];
 391   struct tm *ptm;
 392   time_t secs = time (tm);
 393
 394   if (secs == -1)
 395     {
 396       /* In case of error, return the empty string.  Maybe we should
 397          just abort if this happens?  */
 398       *output = '\0';
 399       return output;
 400     }
 401   ptm = localtime (&secs);
 402   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 403   return output;
 404 }
 405
 406 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 407
 408 char *
 409 datetime_str (time_t *tm)
 410 {
 411   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 412   struct tm *ptm;
 413   time_t secs = time (tm);
 414
 415   if (secs == -1)
 416     {
 417       /* In case of error, return the empty string.  Maybe we should
 418          just abort if this happens?  */
 419       *output = '\0';
 420       return output;
 421     }
 422   ptm = localtime (&secs);
 423   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 424            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 425            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 426   return output;
 427 }
 428 \f
 429 /* The Windows versions of the following two functions are defined in
 430    mswindows.c.  */
 431
 432 #ifndef WINDOWS
 433 void
 434 fork_to_background (void)
 435 {
 436   pid_t pid;
 437   /* Whether we arrange our own version of opt.lfilename here.  */
 438   int changedp = 0;
 439
 440   if (!opt.lfilename)
 441     {
 442       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 443       changedp = 1;
 444     }
 445   pid = fork ();
 446   if (pid < 0)
 447     {
 448       /* parent, error */
 449       perror ("fork");
 450       exit (1);
 451     }
 452   else if (pid != 0)
 453     {
 454       /* parent, no error */
 455       printf (_("Continuing in background, pid %d.\n"), (int)pid);
 456       if (changedp)
 457         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 458       exit (0);                 /* #### should we use _exit()? */
 459     }
 460
 461   /* child: give up the privileges and keep running. */
 462   setsid ();
 463   freopen ("/dev/null", "r", stdin);
 464   freopen ("/dev/null", "w", stdout);
 465   freopen ("/dev/null", "w", stderr);
 466 }
 467 #endif /* not WINDOWS */
 468 \f
 469 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 470    specified with TM.  */
 471 void
 472 touch (const char *file, time_t tm)
 473 {
 474 #ifdef HAVE_STRUCT_UTIMBUF
 475   struct utimbuf times;
 476   times.actime = times.modtime = tm;
 477 #else
 478   time_t times[2];
 479   times[0] = times[1] = tm;
 480 #endif
 481
 482   if (utime (file, &times) == -1)
 483     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 484 }
 485
 486 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 487    nothing under MS-Windows.  */
 488 int
 489 remove_link (const char *file)
 490 {
 491   int err = 0;
 492   struct stat st;
 493
 494   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 495     {
 496       DEBUGP (("Unlinking %s (symlink).\n", file));
 497       err = unlink (file);
 498       if (err != 0)
 499         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 500                    file, strerror (errno));
 501     }
 502   return err;
 503 }
 504
 505 /* Does FILENAME exist?  This is quite a lousy implementation, since
 506    it supplies no error codes -- only a yes-or-no answer.  Thus it
 507    will return that a file does not exist if, e.g., the directory is
 508    unreadable.  I don't mind it too much currently, though.  The
 509    proper way should, of course, be to have a third, error state,
 510    other than true/false, but that would introduce uncalled-for
 511    additional complexity to the callers.  */
 512 int
 513 file_exists_p (const char *filename)
 514 {
 515 #ifdef HAVE_ACCESS
 516   return access (filename, F_OK) >= 0;
 517 #else
 518   struct stat buf;
 519   return stat (filename, &buf) >= 0;
 520 #endif
 521 }
 522
 523 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 524    Returns 0 on error.  */
 525 int
 526 file_non_directory_p (const char *path)
 527 {
 528   struct stat buf;
 529   /* Use lstat() rather than stat() so that symbolic links pointing to
 530      directories can be identified correctly.  */
 531   if (lstat (path, &buf) != 0)
 532     return 0;
 533   return S_ISDIR (buf.st_mode) ? 0 : 1;
 534 }
 535
 536 /* Return a unique filename, given a prefix and count */
 537 static char *
 538 unique_name_1 (const char *fileprefix, int count)
 539 {
 540   char *filename;
 541
 542   if (count)
 543     {
 544       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 545       sprintf (filename, "%s.%d", fileprefix, count);
 546     }
 547   else
 548     filename = xstrdup (fileprefix);
 549
 550   if (!file_exists_p (filename))
 551     return filename;
 552   else
 553     {
 554       xfree (filename);
 555       return NULL;
 556     }
 557 }
 558
 559 /* Return a unique file name, based on PREFIX.  */
 560 char *
 561 unique_name (const char *prefix)
 562 {
 563   char *file = NULL;
 564   int count = 0;
 565
 566   while (!file)
 567     file = unique_name_1 (prefix, count++);
 568   return file;
 569 }
 570 \f
 571 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 572    are missing, create them first.  In case any mkdir() call fails,
 573    return its error status.  Returns 0 on successful completion.
 574
 575    The behaviour of this function should be identical to the behaviour
 576    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 577 int
 578 make_directory (const char *directory)
 579 {
 580   int quit = 0;
 581   int i;
 582   int ret = 0;
 583   char *dir;
 584
 585   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 586      function is unsafe if called with a read-only char *argument.  */
 587   STRDUP_ALLOCA (dir, directory);
 588
 589   /* If the first character of dir is '/', skip it (and thus enable
 590      creation of absolute-pathname directories.  */
 591   for (i = (*dir == '/'); 1; ++i)
 592     {
 593       for (; dir[i] && dir[i] != '/'; i++)
 594         ;
 595       if (!dir[i])
 596         quit = 1;
 597       dir[i] = '\0';
 598       /* Check whether the directory already exists.  Allow creation of
 599          of intermediate directories to fail, as the initial path components
 600          are not necessarily directories!  */
 601       if (!file_exists_p (dir))
 602         ret = mkdir (dir, 0777);
 603       else
 604         ret = 0;
 605       if (quit)
 606         break;
 607       else
 608         dir[i] = '/';
 609     }
 610   return ret;
 611 }
 612
 613 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 614    should be a file name.
 615
 616    file_merge("/foo/bar", "baz")  => "/foo/baz"
 617    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 618    file_merge("foo", "bar")       => "bar"
 619
 620    In other words, it's a simpler and gentler version of uri_merge_1.  */
 621
 622 char *
 623 file_merge (const char *base, const char *file)
 624 {
 625   char *result;
 626   const char *cut = (const char *)strrchr (base, '/');
 627
 628   if (!cut)
 629     return xstrdup (file);
 630
 631   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 632   memcpy (result, base, cut - base);
 633   result[cut - base] = '/';
 634   strcpy (result + (cut - base) + 1, file);
 635
 636   return result;
 637 }
 638 \f
 639 static int in_acclist PARAMS ((const char *const *, const char *, int));
 640
 641 /* Determine whether a file is acceptable to be followed, according to
 642    lists of patterns to accept/reject.  */
 643 int
 644 acceptable (const char *s)
 645 {
 646   int l = strlen (s);
 647
 648   while (l && s[l] != '/')
 649     --l;
 650   if (s[l] == '/')
 651     s += (l + 1);
 652   if (opt.accepts)
 653     {
 654       if (opt.rejects)
 655         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 656                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 657       else
 658         return in_acclist ((const char *const *)opt.accepts, s, 1);
 659     }
 660   else if (opt.rejects)
 661     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 662   return 1;
 663 }
 664
 665 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 666    `/something', frontcmp() will return 1 only if S2 begins with
 667    `/something'.  Otherwise, 0 is returned.  */
 668 int
 669 frontcmp (const char *s1, const char *s2)
 670 {
 671   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 672   return !*s1;
 673 }
 674
 675 /* Iterate through STRLIST, and return the first element that matches
 676    S, through wildcards or front comparison (as appropriate).  */
 677 static char *
 678 proclist (char **strlist, const char *s, enum accd flags)
 679 {
 680   char **x;
 681
 682   for (x = strlist; *x; x++)
 683     if (has_wildcards_p (*x))
 684       {
 685         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 686           break;
 687       }
 688     else
 689       {
 690         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 691         if (frontcmp (p, s))
 692           break;
 693       }
 694   return *x;
 695 }
 696
 697 /* Returns whether DIRECTORY is acceptable for download, wrt the
 698    include/exclude lists.
 699
 700    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 701    and absolute paths may be freely intermixed.  */
 702 int
 703 accdir (const char *directory, enum accd flags)
 704 {
 705   /* Remove starting '/'.  */
 706   if (flags & ALLABS && *directory == '/')
 707     ++directory;
 708   if (opt.includes)
 709     {
 710       if (!proclist (opt.includes, directory, flags))
 711         return 0;
 712     }
 713   if (opt.excludes)
 714     {
 715       if (proclist (opt.excludes, directory, flags))
 716         return 0;
 717     }
 718   return 1;
 719 }
 720
 721 /* Match the end of STRING against PATTERN.  For instance:
 722
 723    match_backwards ("abc", "bc") -> 1
 724    match_backwards ("abc", "ab") -> 0
 725    match_backwards ("abc", "abc") -> 1 */
 726 int
 727 match_tail (const char *string, const char *pattern)
 728 {
 729   int i, j;
 730
 731   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 732     if (string[i] != pattern[j])
 733       break;
 734   /* If the pattern was exhausted, the match was succesful.  */
 735   if (j == -1)
 736     return 1;
 737   else
 738     return 0;
 739 }
 740
 741 /* Checks whether string S matches each element of ACCEPTS.  A list
 742    element are matched either with fnmatch() or match_tail(),
 743    according to whether the element contains wildcards or not.
 744
 745    If the BACKWARD is 0, don't do backward comparison -- just compare
 746    them normally.  */
 747 static int
 748 in_acclist (const char *const *accepts, const char *s, int backward)
 749 {
 750   for (; *accepts; accepts++)
 751     {
 752       if (has_wildcards_p (*accepts))
 753         {
 754           /* fnmatch returns 0 if the pattern *does* match the
 755              string.  */
 756           if (fnmatch (*accepts, s, 0) == 0)
 757             return 1;
 758         }
 759       else
 760         {
 761           if (backward)
 762             {
 763               if (match_tail (s, *accepts))
 764                 return 1;
 765             }
 766           else
 767             {
 768               if (!strcmp (s, *accepts))
 769                 return 1;
 770             }
 771         }
 772     }
 773   return 0;
 774 }
 775
 776 /* Return the location of STR's suffix (file extension).  Examples:
 777    suffix ("foo.bar")       -> "bar"
 778    suffix ("foo.bar.baz")   -> "baz"
 779    suffix ("/foo/bar")      -> NULL
 780    suffix ("/foo.bar/baz")  -> NULL  */
 781 char *
 782 suffix (const char *str)
 783 {
 784   int i;
 785
 786   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 787     ;
 788
 789   if (str[i++] == '.')
 790     return (char *)str + i;
 791   else
 792     return NULL;
 793 }
 794
 795 /* Return non-zero if FNAME ends with a typical HTML suffix.  The
 796    following (case-insensitive) suffixes are presumed to be HTML files:
 797
 798      html
 799      htm
 800      ?html (`?' matches one character)
 801
 802    #### CAVEAT.  This is not necessarily a good indication that FNAME
 803    refers to a file that contains HTML!  */
 804 int
 805 has_html_suffix_p (const char *fname)
 806 {
 807   char *suf;
 808
 809   if ((suf = suffix (fname)) == NULL)
 810     return 0;
 811   if (!strcasecmp (suf, "html"))
 812     return 1;
 813   if (!strcasecmp (suf, "htm"))
 814     return 1;
 815   if (suf[0] && !strcasecmp (suf + 1, "html"))
 816     return 1;
 817   return 0;
 818 }
 819
 820 /* Read a line from FP and return the pointer to freshly allocated
 821    storage.  The stoarage space is obtained through malloc() and
 822    should be freed with free() when it is no longer needed.
 823
 824    The length of the line is not limited, except by available memory.
 825    The newline character at the end of line is retained.  The line is
 826    terminated with a zero character.
 827
 828    After end-of-file is encountered without anything being read, NULL
 829    is returned.  NULL is also returned on error.  To distinguish
 830    between these two cases, use the stdio function ferror().  */
 831
 832 char *
 833 read_whole_line (FILE *fp)
 834 {
 835   int length = 0;
 836   int bufsize = 82;
 837   char *line = (char *)xmalloc (bufsize);
 838
 839   while (fgets (line + length, bufsize - length, fp))
 840     {
 841       length += strlen (line + length);
 842       if (length == 0)
 843         /* Possible for example when reading from a binary file where
 844            a line begins with \0.  */
 845         continue;
 846
 847       if (line[length - 1] == '\n')
 848         break;
 849
 850       /* fgets() guarantees to read the whole line, or to use up the
 851          space we've given it.  We can double the buffer
 852          unconditionally.  */
 853       bufsize <<= 1;
 854       line = xrealloc (line, bufsize);
 855     }
 856   if (length == 0 || ferror (fp))
 857     {
 858       xfree (line);
 859       return NULL;
 860     }
 861   if (length + 1 < bufsize)
 862     /* Relieve the memory from our exponential greediness.  We say
 863        `length + 1' because the terminating \0 is not included in
 864        LENGTH.  We don't need to zero-terminate the string ourselves,
 865        though, because fgets() does that.  */
 866     line = xrealloc (line, length + 1);
 867   return line;
 868 }
 869 \f
 870 /* Read FILE into memory.  A pointer to `struct file_memory' are
 871    returned; use struct element `content' to access file contents, and
 872    the element `length' to know the file length.  `content' is *not*
 873    zero-terminated, and you should *not* read or write beyond the [0,
 874    length) range of characters.
 875
 876    After you are done with the file contents, call read_file_free to
 877    release the memory.
 878
 879    Depending on the operating system and the type of file that is
 880    being read, read_file() either mmap's the file into memory, or
 881    reads the file into the core using read().
 882
 883    If file is named "-", fileno(stdin) is used for reading instead.
 884    If you want to read from a real file named "-", use "./-" instead.  */
 885
 886 struct file_memory *
 887 read_file (const char *file)
 888 {
 889   int fd;
 890   struct file_memory *fm;
 891   long size;
 892   int inhibit_close = 0;
 893
 894   /* Some magic in the finest tradition of Perl and its kin: if FILE
 895      is "-", just use stdin.  */
 896   if (HYPHENP (file))
 897     {
 898       fd = fileno (stdin);
 899       inhibit_close = 1;
 900       /* Note that we don't inhibit mmap() in this case.  If stdin is
 901          redirected from a regular file, mmap() will still work.  */
 902     }
 903   else
 904     fd = open (file, O_RDONLY);
 905   if (fd < 0)
 906     return NULL;
 907   fm = xmalloc (sizeof (struct file_memory));
 908
 909 #ifdef HAVE_MMAP
 910   {
 911     struct stat buf;
 912     if (fstat (fd, &buf) < 0)
 913       goto mmap_lose;
 914     fm->length = buf.st_size;
 915     /* NOTE: As far as I know, the callers of this function never
 916        modify the file text.  Relying on this would enable us to
 917        specify PROT_READ and MAP_SHARED for a marginal gain in
 918        efficiency, but at some cost to generality.  */
 919     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 920                         MAP_PRIVATE, fd, 0);
 921     if (fm->content == (char *)MAP_FAILED)
 922       goto mmap_lose;
 923     if (!inhibit_close)
 924       close (fd);
 925
 926     fm->mmap_p = 1;
 927     return fm;
 928   }
 929
 930  mmap_lose:
 931   /* The most common reason why mmap() fails is that FD does not point
 932      to a plain file.  However, it's also possible that mmap() doesn't
 933      work for a particular type of file.  Therefore, whenever mmap()
 934      fails, we just fall back to the regular method.  */
 935 #endif /* HAVE_MMAP */
 936
 937   fm->length = 0;
 938   size = 512;                   /* number of bytes fm->contents can
 939                                    hold at any given time. */
 940   fm->content = xmalloc (size);
 941   while (1)
 942     {
 943       long nread;
 944       if (fm->length > size / 2)
 945         {
 946           /* #### I'm not sure whether the whole exponential-growth
 947              thing makes sense with kernel read.  On Linux at least,
 948              read() refuses to read more than 4K from a file at a
 949              single chunk anyway.  But other Unixes might optimize it
 950              better, and it doesn't *hurt* anything, so I'm leaving
 951              it.  */
 952
 953           /* Normally, we grow SIZE exponentially to make the number
 954              of calls to read() and realloc() logarithmic in relation
 955              to file size.  However, read() can read an amount of data
 956              smaller than requested, and it would be unreasonably to
 957              double SIZE every time *something* was read.  Therefore,
 958              we double SIZE only when the length exceeds half of the
 959              entire allocated size.  */
 960           size <<= 1;
 961           fm->content = xrealloc (fm->content, size);
 962         }
 963       nread = read (fd, fm->content + fm->length, size - fm->length);
 964       if (nread > 0)
 965         /* Successful read. */
 966         fm->length += nread;
 967       else if (nread < 0)
 968         /* Error. */
 969         goto lose;
 970       else
 971         /* EOF */
 972         break;
 973     }
 974   if (!inhibit_close)
 975     close (fd);
 976   if (size > fm->length && fm->length != 0)
 977     /* Due to exponential growth of fm->content, the allocated region
 978        might be much larger than what is actually needed.  */
 979     fm->content = xrealloc (fm->content, fm->length);
 980   fm->mmap_p = 0;
 981   return fm;
 982
 983  lose:
 984   if (!inhibit_close)
 985     close (fd);
 986   xfree (fm->content);
 987   xfree (fm);
 988   return NULL;
 989 }
 990
 991 /* Release the resources held by FM.  Specifically, this calls
 992    munmap() or xfree() on fm->content, depending whether mmap or
 993    malloc/read were used to read in the file.  It also frees the
 994    memory needed to hold the FM structure itself.  */
 995
 996 void
 997 read_file_free (struct file_memory *fm)
 998 {
 999 #ifdef HAVE_MMAP
1000   if (fm->mmap_p)
1001     {
1002       munmap (fm->content, fm->length);
1003     }
1004   else
1005 #endif
1006     {
1007       xfree (fm->content);
1008     }
1009   xfree (fm);
1010 }
1011 \f
1012 /* Free the pointers in a NULL-terminated vector of pointers, then
1013    free the pointer itself.  */
1014 void
1015 free_vec (char **vec)
1016 {
1017   if (vec)
1018     {
1019       char **p = vec;
1020       while (*p)
1021         xfree (*p++);
1022       xfree (vec);
1023     }
1024 }
1025
1026 /* Append vector V2 to vector V1.  The function frees V2 and
1027    reallocates V1 (thus you may not use the contents of neither
1028    pointer after the call).  If V1 is NULL, V2 is returned.  */
1029 char **
1030 merge_vecs (char **v1, char **v2)
1031 {
1032   int i, j;
1033
1034   if (!v1)
1035     return v2;
1036   if (!v2)
1037     return v1;
1038   if (!*v2)
1039     {
1040       /* To avoid j == 0 */
1041       xfree (v2);
1042       return v1;
1043     }
1044   /* Count v1.  */
1045   for (i = 0; v1[i]; i++);
1046   /* Count v2.  */
1047   for (j = 0; v2[j]; j++);
1048   /* Reallocate v1.  */
1049   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1050   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1051   xfree (v2);
1052   return v1;
1053 }
1054
1055 /* A set of simple-minded routines to store strings in a linked list.
1056    This used to also be used for searching, but now we have hash
1057    tables for that.  */
1058
1059 /* It's a shame that these simple things like linked lists and hash
1060    tables (see hash.c) need to be implemented over and over again.  It
1061    would be nice to be able to use the routines from glib -- see
1062    www.gtk.org for details.  However, that would make Wget depend on
1063    glib, and I want to avoid dependencies to external libraries for
1064    reasons of convenience and portability (I suspect Wget is more
1065    portable than anything ever written for Gnome).  */
1066
1067 /* Append an element to the list.  If the list has a huge number of
1068    elements, this can get slow because it has to find the list's
1069    ending.  If you think you have to call slist_append in a loop,
1070    think about calling slist_prepend() followed by slist_nreverse().  */
1071
1072 slist *
1073 slist_append (slist *l, const char *s)
1074 {
1075   slist *newel = (slist *)xmalloc (sizeof (slist));
1076   slist *beg = l;
1077
1078   newel->string = xstrdup (s);
1079   newel->next = NULL;
1080
1081   if (!l)
1082     return newel;
1083   /* Find the last element.  */
1084   while (l->next)
1085     l = l->next;
1086   l->next = newel;
1087   return beg;
1088 }
1089
1090 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1091
1092 slist *
1093 slist_prepend (slist *l, const char *s)
1094 {
1095   slist *newel = (slist *)xmalloc (sizeof (slist));
1096   newel->string = xstrdup (s);
1097   newel->next = l;
1098   return newel;
1099 }
1100
1101 /* Destructively reverse L. */
1102
1103 slist *
1104 slist_nreverse (slist *l)
1105 {
1106   slist *prev = NULL;
1107   while (l)
1108     {
1109       slist *next = l->next;
1110       l->next = prev;
1111       prev = l;
1112       l = next;
1113     }
1114   return prev;
1115 }
1116
1117 /* Is there a specific entry in the list?  */
1118 int
1119 slist_contains (slist *l, const char *s)
1120 {
1121   for (; l; l = l->next)
1122     if (!strcmp (l->string, s))
1123       return 1;
1124   return 0;
1125 }
1126
1127 /* Free the whole slist.  */
1128 void
1129 slist_free (slist *l)
1130 {
1131   while (l)
1132     {
1133       slist *n = l->next;
1134       xfree (l->string);
1135       xfree (l);
1136       l = n;
1137     }
1138 }
1139 \f
1140 /* Sometimes it's useful to create "sets" of strings, i.e. special
1141    hash tables where you want to store strings as keys and merely
1142    query for their existence.  Here is a set of utility routines that
1143    makes that transparent.  */
1144
1145 void
1146 string_set_add (struct hash_table *ht, const char *s)
1147 {
1148   /* First check whether the set element already exists.  If it does,
1149      do nothing so that we don't have to free() the old element and
1150      then strdup() a new one.  */
1151   if (hash_table_contains (ht, s))
1152     return;
1153
1154   /* We use "1" as value.  It provides us a useful and clear arbitrary
1155      value, and it consumes no memory -- the pointers to the same
1156      string "1" will be shared by all the key-value pairs in all `set'
1157      hash tables.  */
1158   hash_table_put (ht, xstrdup (s), "1");
1159 }
1160
1161 /* Synonym for hash_table_contains... */
1162
1163 int
1164 string_set_contains (struct hash_table *ht, const char *s)
1165 {
1166   return hash_table_contains (ht, s);
1167 }
1168
1169 static int
1170 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1171 {
1172   xfree (key);
1173   return 0;
1174 }
1175
1176 void
1177 string_set_free (struct hash_table *ht)
1178 {
1179   hash_table_map (ht, string_set_free_mapper, NULL);
1180   hash_table_destroy (ht);
1181 }
1182
1183 static int
1184 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1185 {
1186   xfree (key);
1187   xfree (value);
1188   return 0;
1189 }
1190
1191 /* Another utility function: call free() on all keys and values of HT.  */
1192
1193 void
1194 free_keys_and_values (struct hash_table *ht)
1195 {
1196   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1197 }
1198
1199 \f
1200 /* Engine for legible and legible_very_long; this function works on
1201    strings.  */
1202
1203 static char *
1204 legible_1 (const char *repr)
1205 {
1206   static char outbuf[128];
1207   int i, i1, mod;
1208   char *outptr;
1209   const char *inptr;
1210
1211   /* Reset the pointers.  */
1212   outptr = outbuf;
1213   inptr = repr;
1214   /* If the number is negative, shift the pointers.  */
1215   if (*inptr == '-')
1216     {
1217       *outptr++ = '-';
1218       ++inptr;
1219     }
1220   /* How many digits before the first separator?  */
1221   mod = strlen (inptr) % 3;
1222   /* Insert them.  */
1223   for (i = 0; i < mod; i++)
1224     *outptr++ = inptr[i];
1225   /* Now insert the rest of them, putting separator before every
1226      third digit.  */
1227   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1228     {
1229       if (i % 3 == 0 && i1 != 0)
1230         *outptr++ = ',';
1231       *outptr++ = inptr[i1];
1232     }
1233   /* Zero-terminate the string.  */
1234   *outptr = '\0';
1235   return outbuf;
1236 }
1237
1238 /* Legible -- return a static pointer to the legibly printed long.  */
1239 char *
1240 legible (long l)
1241 {
1242   char inbuf[24];
1243   /* Print the number into the buffer.  */
1244   number_to_string (inbuf, l);
1245   return legible_1 (inbuf);
1246 }
1247
1248 /* Write a string representation of NUMBER into the provided buffer.
1249    We cannot use sprintf() because we cannot be sure whether the
1250    platform supports printing of what we chose for VERY_LONG_TYPE.
1251
1252    Example: Gcc supports `long long' under many platforms, but on many
1253    of those the native libc knows nothing of it and therefore cannot
1254    print it.
1255
1256    How long BUFFER needs to be depends on the platform and the content
1257    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1258    bytes are sufficient.  Using more might be a good idea.
1259
1260    This function does not go through the hoops that long_to_string
1261    goes to because it doesn't aspire to be fast.  (It's called perhaps
1262    once in a Wget run.)  */
1263
1264 static void
1265 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1266 {
1267   int i = 0;
1268   int j;
1269
1270   /* Print the number backwards... */
1271   do
1272     {
1273       buffer[i++] = '0' + number % 10;
1274       number /= 10;
1275     }
1276   while (number);
1277
1278   /* ...and reverse the order of the digits. */
1279   for (j = 0; j < i / 2; j++)
1280     {
1281       char c = buffer[j];
1282       buffer[j] = buffer[i - 1 - j];
1283       buffer[i - 1 - j] = c;
1284     }
1285   buffer[i] = '\0';
1286 }
1287
1288 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1289 char *
1290 legible_very_long (VERY_LONG_TYPE l)
1291 {
1292   char inbuf[128];
1293   /* Print the number into the buffer.  */
1294   very_long_to_string (inbuf, l);
1295   return legible_1 (inbuf);
1296 }
1297
1298 /* Count the digits in a (long) integer.  */
1299 int
1300 numdigit (long number)
1301 {
1302   int cnt = 1;
1303   if (number < 0)
1304     {
1305       number = -number;
1306       ++cnt;
1307     }
1308   while ((number /= 10) > 0)
1309     ++cnt;
1310   return cnt;
1311 }
1312
1313 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1314 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1315
1316 #define DIGITS_1(figure) ONE_DIGIT (figure)
1317 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1318 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1319 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1320 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1321 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1322 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1323 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1324 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1325 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1326
1327 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1328
1329 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1330 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1331 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1332 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1333 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1334 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1335 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1336 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1337 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1338
1339 /* Print NUMBER to BUFFER in base 10.  This should be completely
1340    equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1341
1342    The speedup may make a difference in programs that frequently
1343    convert numbers to strings.  Some implementations of sprintf,
1344    particularly the one in GNU libc, have been known to be extremely
1345    slow compared to this function.
1346
1347    Return the pointer to the location where the terminating zero was
1348    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1349    function is done.)
1350
1351    BUFFER should be big enough to accept as many bytes as you expect
1352    the number to take up.  On machines with 64-bit longs the maximum
1353    needed size is 24 bytes.  That includes the digits needed for the
1354    largest 64-bit number, the `-' sign in case it's negative, and the
1355    terminating '\0'.  */
1356
1357 char *
1358 number_to_string (char *buffer, long number)
1359 {
1360   char *p = buffer;
1361   long n = number;
1362
1363 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1364   /* We are running in a strange or misconfigured environment.  Let
1365      sprintf cope with it.  */
1366   sprintf (buffer, "%ld", n);
1367   p += strlen (buffer);
1368 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1369
1370   if (n < 0)
1371     {
1372       *p++ = '-';
1373       n = -n;
1374     }
1375
1376   if      (n < 10)                   { DIGITS_1 (1); }
1377   else if (n < 100)                  { DIGITS_2 (10); }
1378   else if (n < 1000)                 { DIGITS_3 (100); }
1379   else if (n < 10000)                { DIGITS_4 (1000); }
1380   else if (n < 100000)               { DIGITS_5 (10000); }
1381   else if (n < 1000000)              { DIGITS_6 (100000); }
1382   else if (n < 10000000)             { DIGITS_7 (1000000); }
1383   else if (n < 100000000)            { DIGITS_8 (10000000); }
1384   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1385 #if SIZEOF_LONG == 4
1386   /* ``if (1)'' serves only to preserve editor indentation. */
1387   else if (1)                        { DIGITS_10 (1000000000); }
1388 #else  /* SIZEOF_LONG != 4 */
1389   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1390   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1391   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1392   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1393   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1394   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1395   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1396   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1397   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1398   else                               { DIGITS_19 (1000000000000000000L); }
1399 #endif /* SIZEOF_LONG != 4 */
1400
1401   *p = '\0';
1402 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1403
1404   return p;
1405 }
1406
1407 #undef ONE_DIGIT
1408 #undef ONE_DIGIT_ADVANCE
1409
1410 #undef DIGITS_1
1411 #undef DIGITS_2
1412 #undef DIGITS_3
1413 #undef DIGITS_4
1414 #undef DIGITS_5
1415 #undef DIGITS_6
1416 #undef DIGITS_7
1417 #undef DIGITS_8
1418 #undef DIGITS_9
1419 #undef DIGITS_10
1420 #undef DIGITS_11
1421 #undef DIGITS_12
1422 #undef DIGITS_13
1423 #undef DIGITS_14
1424 #undef DIGITS_15
1425 #undef DIGITS_16
1426 #undef DIGITS_17
1427 #undef DIGITS_18
1428 #undef DIGITS_19
1429 \f
1430 /* Support for timers. */
1431
1432 #undef TIMER_WINDOWS
1433 #undef TIMER_GETTIMEOFDAY
1434 #undef TIMER_TIME
1435
1436 /* Depending on the OS and availability of gettimeofday(), one and
1437    only one of the above constants will be defined.  Virtually all
1438    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1439    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1440    non-Windows systems without gettimeofday.
1441
1442    #### Perhaps we should also support ftime(), which exists on old
1443    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1444    C, if memory serves me.)  */
1445
1446 #ifdef WINDOWS
1447 # define TIMER_WINDOWS
1448 #else  /* not WINDOWS */
1449 # ifdef HAVE_GETTIMEOFDAY
1450 #  define TIMER_GETTIMEOFDAY
1451 # else
1452 #  define TIMER_TIME
1453 # endif
1454 #endif /* not WINDOWS */
1455
1456 struct wget_timer {
1457 #ifdef TIMER_GETTIMEOFDAY
1458   long secs;
1459   long usecs;
1460 #endif
1461
1462 #ifdef TIMER_TIME
1463   time_t secs;
1464 #endif
1465
1466 #ifdef TIMER_WINDOWS
1467   ULARGE_INTEGER wintime;
1468 #endif
1469 };
1470
1471 /* Allocate a timer.  It is not legal to do anything with a freshly
1472    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1473
1474 struct wget_timer *
1475 wtimer_allocate (void)
1476 {
1477   struct wget_timer *wt =
1478     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1479   return wt;
1480 }
1481
1482 /* Allocate a new timer and reset it.  Return the new timer. */
1483
1484 struct wget_timer *
1485 wtimer_new (void)
1486 {
1487   struct wget_timer *wt = wtimer_allocate ();
1488   wtimer_reset (wt);
1489   return wt;
1490 }
1491
1492 /* Free the resources associated with the timer.  Its further use is
1493    prohibited.  */
1494
1495 void
1496 wtimer_delete (struct wget_timer *wt)
1497 {
1498   xfree (wt);
1499 }
1500
1501 /* Reset timer WT.  This establishes the starting point from which
1502    wtimer_elapsed() will return the number of elapsed
1503    milliseconds.  It is allowed to reset a previously used timer.  */
1504
1505 void
1506 wtimer_reset (struct wget_timer *wt)
1507 {
1508 #ifdef TIMER_GETTIMEOFDAY
1509   struct timeval t;
1510   gettimeofday (&t, NULL);
1511   wt->secs  = t.tv_sec;
1512   wt->usecs = t.tv_usec;
1513 #endif
1514
1515 #ifdef TIMER_TIME
1516   wt->secs = time (NULL);
1517 #endif
1518
1519 #ifdef TIMER_WINDOWS
1520   FILETIME ft;
1521   SYSTEMTIME st;
1522   GetSystemTime (&st);
1523   SystemTimeToFileTime (&st, &ft);
1524   wt->wintime.HighPart = ft.dwHighDateTime;
1525   wt->wintime.LowPart  = ft.dwLowDateTime;
1526 #endif
1527 }
1528
1529 /* Return the number of milliseconds elapsed since the timer was last
1530    reset.  It is allowed to call this function more than once to get
1531    increasingly higher elapsed values.  */
1532
1533 long
1534 wtimer_elapsed (struct wget_timer *wt)
1535 {
1536 #ifdef TIMER_GETTIMEOFDAY
1537   struct timeval t;
1538   gettimeofday (&t, NULL);
1539   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1540 #endif
1541
1542 #ifdef TIMER_TIME
1543   time_t now = time (NULL);
1544   return 1000 * (now - wt->secs);
1545 #endif
1546
1547 #ifdef WINDOWS
1548   FILETIME ft;
1549   SYSTEMTIME st;
1550   ULARGE_INTEGER uli;
1551   GetSystemTime (&st);
1552   SystemTimeToFileTime (&st, &ft);
1553   uli.HighPart = ft.dwHighDateTime;
1554   uli.LowPart = ft.dwLowDateTime;
1555   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1556 #endif
1557 }
1558
1559 /* Return the assessed granularity of the timer implementation.  This
1560    is important for certain code that tries to deal with "zero" time
1561    intervals.  */
1562
1563 long
1564 wtimer_granularity (void)
1565 {
1566 #ifdef TIMER_GETTIMEOFDAY
1567   /* Granularity of gettimeofday is hugely architecture-dependent.
1568      However, it appears that on modern machines it is better than
1569      1ms.  */
1570   return 1;
1571 #endif
1572
1573 #ifdef TIMER_TIME
1574   /* This is clear. */
1575   return 1000;
1576 #endif
1577
1578 #ifdef TIMER_WINDOWS
1579   /* ? */
1580   return 1;
1581 #endif
1582 }
1583 \f
1584 /* This should probably be at a better place, but it doesn't really
1585    fit into html-parse.c.  */
1586
1587 /* The function returns the pointer to the malloc-ed quoted version of
1588    string s.  It will recognize and quote numeric and special graphic
1589    entities, as per RFC1866:
1590
1591    `&' -> `&amp;'
1592    `<' -> `&lt;'
1593    `>' -> `&gt;'
1594    `"' -> `&quot;'
1595    SP  -> `&#32;'
1596
1597    No other entities are recognized or replaced.  */
1598 char *
1599 html_quote_string (const char *s)
1600 {
1601   const char *b = s;
1602   char *p, *res;
1603   int i;
1604
1605   /* Pass through the string, and count the new size.  */
1606   for (i = 0; *s; s++, i++)
1607     {
1608       if (*s == '&')
1609         i += 4;                 /* `amp;' */
1610       else if (*s == '<' || *s == '>')
1611         i += 3;                 /* `lt;' and `gt;' */
1612       else if (*s == '\"')
1613         i += 5;                 /* `quot;' */
1614       else if (*s == ' ')
1615         i += 4;                 /* #32; */
1616     }
1617   res = (char *)xmalloc (i + 1);
1618   s = b;
1619   for (p = res; *s; s++)
1620     {
1621       switch (*s)
1622         {
1623         case '&':
1624           *p++ = '&';
1625           *p++ = 'a';
1626           *p++ = 'm';
1627           *p++ = 'p';
1628           *p++ = ';';
1629           break;
1630         case '<': case '>':
1631           *p++ = '&';
1632           *p++ = (*s == '<' ? 'l' : 'g');
1633           *p++ = 't';
1634           *p++ = ';';
1635           break;
1636         case '\"':
1637           *p++ = '&';
1638           *p++ = 'q';
1639           *p++ = 'u';
1640           *p++ = 'o';
1641           *p++ = 't';
1642           *p++ = ';';
1643           break;
1644         case ' ':
1645           *p++ = '&';
1646           *p++ = '#';
1647           *p++ = '3';
1648           *p++ = '2';
1649           *p++ = ';';
1650           break;
1651         default:
1652           *p++ = *s;
1653         }
1654     }
1655   *p = '\0';
1656   return res;
1657 }
1658
1659 /* Determine the width of the terminal we're running on.  If that's
1660    not possible, return 0.  */
1661
1662 int
1663 determine_screen_width (void)
1664 {
1665   /* If there's a way to get the terminal size using POSIX
1666      tcgetattr(), somebody please tell me.  */
1667 #ifndef TIOCGWINSZ
1668   return 0;
1669 #else  /* TIOCGWINSZ */
1670   int fd;
1671   struct winsize wsz;
1672
1673   if (opt.lfilename != NULL)
1674     return 0;
1675
1676   fd = fileno (stderr);
1677   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1678     return 0;                   /* most likely ENOTTY */
1679
1680   return wsz.ws_col;
1681 #endif /* TIOCGWINSZ */
1682 }
1683
1684 /* Return a random number between 0 and MAX-1, inclusive.
1685
1686    If MAX is greater than the value of RAND_MAX+1 on the system, the
1687    returned value will be in the range [0, RAND_MAX].  This may be
1688    fixed in a future release.
1689
1690    The random number generator is seeded automatically the first time
1691    it is called.
1692
1693    This uses rand() for portability.  It has been suggested that
1694    random() offers better randomness, but this is not required for
1695    Wget, so I chose to go for simplicity and use rand
1696    unconditionally.  */
1697
1698 int
1699 random_number (int max)
1700 {
1701   static int seeded;
1702   double bounded;
1703   int rnd;
1704
1705   if (!seeded)
1706     {
1707       srand (time (NULL));
1708       seeded = 1;
1709     }
1710   rnd = rand ();
1711
1712   /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1713      and enforce that assumption by masking other bits.  */
1714 #ifndef RAND_MAX
1715 # define RAND_MAX 32767
1716   rnd &= RAND_MAX;
1717 #endif
1718
1719   /* This is equivalent to rand() % max, but uses the high-order bits
1720      for better randomness on architecture where rand() is implemented
1721      using a simple congruential generator.  */
1722
1723   bounded = (double)max * rnd / (RAND_MAX + 1.0);
1724   return (int)bounded;
1725 }
1726
1727 #if 0
1728 /* A debugging function for checking whether an MD5 library works. */
1729
1730 #include "gen-md5.h"
1731
1732 char *
1733 debug_test_md5 (char *buf)
1734 {
1735   unsigned char raw[16];
1736   static char res[33];
1737   unsigned char *p1;
1738   char *p2;
1739   int cnt;
1740   ALLOCA_MD5_CONTEXT (ctx);
1741
1742   gen_md5_init (ctx);
1743   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1744   gen_md5_finish (ctx, raw);
1745
1746   p1 = raw;
1747   p2 = res;
1748   cnt = 16;
1749   while (cnt--)
1750     {
1751       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1752       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1753       ++p1;
1754     }
1755   *p2 = '\0';
1756
1757   return res;
1758 }
1759 #endif