sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
   3    2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 #include <config.h>
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #include <time.h>
  37 #ifdef HAVE_SYS_TIME_H
  38 # include <sys/time.h>
  39 #endif
  40 #ifdef HAVE_UNISTD_H
  41 # include <unistd.h>
  42 #endif
  43 #ifdef HAVE_MMAP
  44 # include <sys/mman.h>
  45 #endif
  46 #ifdef HAVE_UTIME_H
  47 # include <utime.h>
  48 #endif
  49 #ifdef HAVE_SYS_UTIME_H
  50 # include <sys/utime.h>
  51 #endif
  52 #include <errno.h>
  53 #include <fcntl.h>
  54 #include <assert.h>
  55 #include <stdarg.h>
  56 #include <locale.h>
  57
  58 /* For TIOCGWINSZ and friends: */
  59 #ifdef HAVE_SYS_IOCTL_H
  60 # include <sys/ioctl.h>
  61 #endif
  62 #ifdef HAVE_TERMIOS_H
  63 # include <termios.h>
  64 #endif
  65
  66 /* Needed for Unix version of run_with_timeout. */
  67 #include <signal.h>
  68 #include <setjmp.h>
  69
  70 #ifndef HAVE_SIGSETJMP
  71 /* If sigsetjmp is a macro, configure won't pick it up. */
  72 # ifdef sigsetjmp
  73 #  define HAVE_SIGSETJMP
  74 # endif
  75 #endif
  76
  77 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  78 # define USE_SIGNAL_TIMEOUT
  79 #endif
  80
  81 #include "wget.h"
  82 #include "utils.h"
  83 #include "hash.h"
  84
  85 #ifdef TESTING
  86 #include "test.h"
  87 #endif
  88
  89 /* Utility function: like xstrdup(), but also lowercases S.  */
  90
  91 char *
  92 xstrdup_lower (const char *s)
  93 {
  94   char *copy = xstrdup (s);
  95   char *p = copy;
  96   for (; *p; p++)
  97     *p = TOLOWER (*p);
  98   return copy;
  99 }
 100
 101 /* Copy the string formed by two pointers (one on the beginning, other
 102    on the char after the last char) to a new, malloc-ed location.
 103    0-terminate it.  */
 104 char *
 105 strdupdelim (const char *beg, const char *end)
 106 {
 107   char *res = xmalloc (end - beg + 1);
 108   memcpy (res, beg, end - beg);
 109   res[end - beg] = '\0';
 110   return res;
 111 }
 112
 113 /* Parse a string containing comma-separated elements, and return a
 114    vector of char pointers with the elements.  Spaces following the
 115    commas are ignored.  */
 116 char **
 117 sepstring (const char *s)
 118 {
 119   char **res;
 120   const char *p;
 121   int i = 0;
 122
 123   if (!s || !*s)
 124     return NULL;
 125   res = NULL;
 126   p = s;
 127   while (*s)
 128     {
 129       if (*s == ',')
 130         {
 131           res = xrealloc (res, (i + 2) * sizeof (char *));
 132           res[i] = strdupdelim (p, s);
 133           res[++i] = NULL;
 134           ++s;
 135           /* Skip the blanks following the ','.  */
 136           while (ISSPACE (*s))
 137             ++s;
 138           p = s;
 139         }
 140       else
 141         ++s;
 142     }
 143   res = xrealloc (res, (i + 2) * sizeof (char *));
 144   res[i] = strdupdelim (p, s);
 145   res[i + 1] = NULL;
 146   return res;
 147 }
 148 \f
 149 /* Like sprintf, but prints into a string of sufficient size freshly
 150    allocated with malloc, which is returned.  If unable to print due
 151    to invalid format, returns NULL.  Inability to allocate needed
 152    memory results in abort, as with xmalloc.  This is in spirit
 153    similar to the GNU/BSD extension asprintf, but somewhat easier to
 154    use.
 155
 156    Internally the function either calls vasprintf or loops around
 157    vsnprintf until the correct size is found.  Since Wget also ships a
 158    fallback implementation of vsnprintf, this should be portable.  */
 159
 160 char *
 161 aprintf (const char *fmt, ...)
 162 {
 163 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 164   /* Use vasprintf. */
 165   int ret;
 166   va_list args;
 167   char *str;
 168   va_start (args, fmt);
 169   ret = vasprintf (&str, fmt, args);
 170   va_end (args);
 171   if (ret < 0 && errno == ENOMEM)
 172     abort ();                   /* for consistency with xmalloc/xrealloc */
 173   else if (ret < 0)
 174     return NULL;
 175   return str;
 176 #else  /* not HAVE_VASPRINTF */
 177
 178   /* vasprintf is unavailable.  snprintf into a small buffer and
 179      resize it as necessary. */
 180   int size = 32;
 181   char *str = xmalloc (size);
 182
 183   /* #### This code will infloop and eventually abort in xrealloc if
 184      passed a FMT that causes snprintf to consistently return -1.  */
 185
 186   while (1)
 187     {
 188       int n;
 189       va_list args;
 190
 191       va_start (args, fmt);
 192       n = vsnprintf (str, size, fmt, args);
 193       va_end (args);
 194
 195       /* If the printing worked, return the string. */
 196       if (n > -1 && n < size)
 197         return str;
 198
 199       /* Else try again with a larger buffer. */
 200       if (n > -1)               /* C99 */
 201         size = n + 1;           /* precisely what is needed */
 202       else
 203         size <<= 1;             /* twice the old size */
 204       str = xrealloc (str, size);
 205     }
 206 #endif /* not HAVE_VASPRINTF */
 207 }
 208
 209 /* Concatenate the NULL-terminated list of string arguments into
 210    freshly allocated space.  */
 211
 212 char *
 213 concat_strings (const char *str0, ...)
 214 {
 215   va_list args;
 216   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 217   char *ret, *p;
 218
 219   const char *next_str;
 220   int total_length = 0;
 221   int argcount;
 222
 223   /* Calculate the length of and allocate the resulting string. */
 224
 225   argcount = 0;
 226   va_start (args, str0);
 227   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 228     {
 229       int len = strlen (next_str);
 230       if (argcount < countof (saved_lengths))
 231         saved_lengths[argcount++] = len;
 232       total_length += len;
 233     }
 234   va_end (args);
 235   p = ret = xmalloc (total_length + 1);
 236
 237   /* Copy the strings into the allocated space. */
 238
 239   argcount = 0;
 240   va_start (args, str0);
 241   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 242     {
 243       int len;
 244       if (argcount < countof (saved_lengths))
 245         len = saved_lengths[argcount++];
 246       else
 247         len = strlen (next_str);
 248       memcpy (p, next_str, len);
 249       p += len;
 250     }
 251   va_end (args);
 252   *p = '\0';
 253
 254   return ret;
 255 }
 256 \f
 257 /* Format the provided time according to the specified format.  The
 258    format is a string with format elements supported by strftime.  */
 259
 260 static char *
 261 fmttime (time_t t, const char *fmt)
 262 {
 263   static char output[32];
 264   struct tm *tm = localtime(&t);
 265   if (!tm)
 266     abort ();
 267   if (!strftime(output, sizeof(output), fmt, tm))
 268     abort ();
 269   return output;
 270 }
 271
 272 /* Return pointer to a static char[] buffer in which zero-terminated
 273    string-representation of TM (in form hh:mm:ss) is printed.
 274
 275    If TM is NULL, the current time will be used.  */
 276
 277 char *
 278 time_str (time_t t)
 279 {
 280   return fmttime(t, "%H:%M:%S");
 281 }
 282
 283 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 284
 285 char *
 286 datetime_str (time_t t)
 287 {
 288   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 289 }
 290 \f
 291 /* The Windows versions of the following two functions are defined in
 292    mswindows.c. On MSDOS this function should never be called. */
 293
 294 #if !defined(WINDOWS) && !defined(MSDOS)
 295 void
 296 fork_to_background (void)
 297 {
 298   pid_t pid;
 299   /* Whether we arrange our own version of opt.lfilename here.  */
 300   bool logfile_changed = false;
 301
 302   if (!opt.lfilename)
 303     {
 304       /* We must create the file immediately to avoid either a race
 305          condition (which arises from using unique_name and failing to
 306          use fopen_excl) or lying to the user about the log file name
 307          (which arises from using unique_name, printing the name, and
 308          using fopen_excl later on.)  */
 309       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 310       if (new_log_fp)
 311         {
 312           logfile_changed = true;
 313           fclose (new_log_fp);
 314         }
 315     }
 316   pid = fork ();
 317   if (pid < 0)
 318     {
 319       /* parent, error */
 320       perror ("fork");
 321       exit (1);
 322     }
 323   else if (pid != 0)
 324     {
 325       /* parent, no error */
 326       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 327       if (logfile_changed)
 328         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 329       exit (0);                 /* #### should we use _exit()? */
 330     }
 331
 332   /* child: give up the privileges and keep running. */
 333   setsid ();
 334   freopen ("/dev/null", "r", stdin);
 335   freopen ("/dev/null", "w", stdout);
 336   freopen ("/dev/null", "w", stderr);
 337 }
 338 #endif /* !WINDOWS && !MSDOS */
 339 \f
 340 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 341    specified with TM.  The atime ("access time") is set to the current
 342    time.  */
 343
 344 void
 345 touch (const char *file, time_t tm)
 346 {
 347 #ifdef HAVE_STRUCT_UTIMBUF
 348   struct utimbuf times;
 349 #else
 350   struct {
 351     time_t actime;
 352     time_t modtime;
 353   } times;
 354 #endif
 355   times.modtime = tm;
 356   times.actime = time (NULL);
 357   if (utime (file, &times) == -1)
 358     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 359 }
 360
 361 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 362    nothing under MS-Windows.  */
 363 int
 364 remove_link (const char *file)
 365 {
 366   int err = 0;
 367   struct_stat st;
 368
 369   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 370     {
 371       DEBUGP (("Unlinking %s (symlink).\n", file));
 372       err = unlink (file);
 373       if (err != 0)
 374         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 375                    file, strerror (errno));
 376     }
 377   return err;
 378 }
 379
 380 /* Does FILENAME exist?  This is quite a lousy implementation, since
 381    it supplies no error codes -- only a yes-or-no answer.  Thus it
 382    will return that a file does not exist if, e.g., the directory is
 383    unreadable.  I don't mind it too much currently, though.  The
 384    proper way should, of course, be to have a third, error state,
 385    other than true/false, but that would introduce uncalled-for
 386    additional complexity to the callers.  */
 387 bool
 388 file_exists_p (const char *filename)
 389 {
 390 #ifdef HAVE_ACCESS
 391   return access (filename, F_OK) >= 0;
 392 #else
 393   struct_stat buf;
 394   return stat (filename, &buf) >= 0;
 395 #endif
 396 }
 397
 398 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 399    Returns 0 on error.  */
 400 bool
 401 file_non_directory_p (const char *path)
 402 {
 403   struct_stat buf;
 404   /* Use lstat() rather than stat() so that symbolic links pointing to
 405      directories can be identified correctly.  */
 406   if (lstat (path, &buf) != 0)
 407     return false;
 408   return S_ISDIR (buf.st_mode) ? false : true;
 409 }
 410
 411 /* Return the size of file named by FILENAME, or -1 if it cannot be
 412    opened or seeked into. */
 413 wgint
 414 file_size (const char *filename)
 415 {
 416 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 417   wgint size;
 418   /* We use fseek rather than stat to determine the file size because
 419      that way we can also verify that the file is readable without
 420      explicitly checking for permissions.  Inspired by the POST patch
 421      by Arnaud Wylie.  */
 422   FILE *fp = fopen (filename, "rb");
 423   if (!fp)
 424     return -1;
 425   fseeko (fp, 0, SEEK_END);
 426   size = ftello (fp);
 427   fclose (fp);
 428   return size;
 429 #else
 430   struct_stat st;
 431   if (stat (filename, &st) < 0)
 432     return -1;
 433   return st.st_size;
 434 #endif
 435 }
 436
 437 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 438    doesn't exist is found.  Return a freshly allocated copy of the
 439    unused file name.  */
 440
 441 static char *
 442 unique_name_1 (const char *prefix)
 443 {
 444   int count = 1;
 445   int plen = strlen (prefix);
 446   char *template = (char *)alloca (plen + 1 + 24);
 447   char *template_tail = template + plen;
 448
 449   memcpy (template, prefix, plen);
 450   *template_tail++ = '.';
 451
 452   do
 453     number_to_string (template_tail, count++);
 454   while (file_exists_p (template));
 455
 456   return xstrdup (template);
 457 }
 458
 459 /* Return a unique file name, based on FILE.
 460
 461    More precisely, if FILE doesn't exist, it is returned unmodified.
 462    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 463    file name that doesn't exist is returned.
 464
 465    The resulting file is not created, only verified that it didn't
 466    exist at the point in time when the function was called.
 467    Therefore, where security matters, don't rely that the file created
 468    by this function exists until you open it with O_EXCL or
 469    equivalent.
 470
 471    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 472    string.  Otherwise, it may return FILE if the file doesn't exist
 473    (and therefore doesn't need changing).  */
 474
 475 char *
 476 unique_name (const char *file, bool allow_passthrough)
 477 {
 478   /* If the FILE itself doesn't exist, return it without
 479      modification. */
 480   if (!file_exists_p (file))
 481     return allow_passthrough ? (char *)file : xstrdup (file);
 482
 483   /* Otherwise, find a numeric suffix that results in unused file name
 484      and return it.  */
 485   return unique_name_1 (file);
 486 }
 487
 488 /* Create a file based on NAME, except without overwriting an existing
 489    file with that name.  Providing O_EXCL is correctly implemented,
 490    this function does not have the race condition associated with
 491    opening the file returned by unique_name.  */
 492
 493 FILE *
 494 unique_create (const char *name, bool binary, char **opened_name)
 495 {
 496   /* unique file name, based on NAME */
 497   char *uname = unique_name (name, false);
 498   FILE *fp;
 499   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 500     {
 501       xfree (uname);
 502       uname = unique_name (name, false);
 503     }
 504   if (opened_name && fp != NULL)
 505     {
 506       if (fp)
 507         *opened_name = uname;
 508       else
 509         {
 510           *opened_name = NULL;
 511           xfree (uname);
 512         }
 513     }
 514   else
 515     xfree (uname);
 516   return fp;
 517 }
 518
 519 /* Open the file for writing, with the addition that the file is
 520    opened "exclusively".  This means that, if the file already exists,
 521    this function will *fail* and errno will be set to EEXIST.  If
 522    BINARY is set, the file will be opened in binary mode, equivalent
 523    to fopen's "wb".
 524
 525    If opening the file fails for any reason, including the file having
 526    previously existed, this function returns NULL and sets errno
 527    appropriately.  */
 528
 529 FILE *
 530 fopen_excl (const char *fname, bool binary)
 531 {
 532   int fd;
 533 #ifdef O_EXCL
 534   int flags = O_WRONLY | O_CREAT | O_EXCL;
 535 # ifdef O_BINARY
 536   if (binary)
 537     flags |= O_BINARY;
 538 # endif
 539   fd = open (fname, flags, 0666);
 540   if (fd < 0)
 541     return NULL;
 542   return fdopen (fd, binary ? "wb" : "w");
 543 #else  /* not O_EXCL */
 544   /* Manually check whether the file exists.  This is prone to race
 545      conditions, but systems without O_EXCL haven't deserved
 546      better.  */
 547   if (file_exists_p (fname))
 548     {
 549       errno = EEXIST;
 550       return NULL;
 551     }
 552   return fopen (fname, binary ? "wb" : "w");
 553 #endif /* not O_EXCL */
 554 }
 555 \f
 556 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 557    are missing, create them first.  In case any mkdir() call fails,
 558    return its error status.  Returns 0 on successful completion.
 559
 560    The behaviour of this function should be identical to the behaviour
 561    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 562 int
 563 make_directory (const char *directory)
 564 {
 565   int i, ret, quit = 0;
 566   char *dir;
 567
 568   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 569      function is unsafe if called with a read-only char *argument.  */
 570   STRDUP_ALLOCA (dir, directory);
 571
 572   /* If the first character of dir is '/', skip it (and thus enable
 573      creation of absolute-pathname directories.  */
 574   for (i = (*dir == '/'); 1; ++i)
 575     {
 576       for (; dir[i] && dir[i] != '/'; i++)
 577         ;
 578       if (!dir[i])
 579         quit = 1;
 580       dir[i] = '\0';
 581       /* Check whether the directory already exists.  Allow creation of
 582          of intermediate directories to fail, as the initial path components
 583          are not necessarily directories!  */
 584       if (!file_exists_p (dir))
 585         ret = mkdir (dir, 0777);
 586       else
 587         ret = 0;
 588       if (quit)
 589         break;
 590       else
 591         dir[i] = '/';
 592     }
 593   return ret;
 594 }
 595
 596 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 597    should be a file name.
 598
 599    file_merge("/foo/bar", "baz")  => "/foo/baz"
 600    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 601    file_merge("foo", "bar")       => "bar"
 602
 603    In other words, it's a simpler and gentler version of uri_merge.  */
 604
 605 char *
 606 file_merge (const char *base, const char *file)
 607 {
 608   char *result;
 609   const char *cut = (const char *)strrchr (base, '/');
 610
 611   if (!cut)
 612     return xstrdup (file);
 613
 614   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 615   memcpy (result, base, cut - base);
 616   result[cut - base] = '/';
 617   strcpy (result + (cut - base) + 1, file);
 618
 619   return result;
 620 }
 621 \f
 622 /* Like fnmatch, but performs a case-insensitive match.  */
 623
 624 int
 625 fnmatch_nocase (const char *pattern, const char *string, int flags)
 626 {
 627 #ifdef FNM_CASEFOLD
 628   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 629      also present on *BSD platforms, and possibly elsewhere.  */
 630   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 631 #else
 632   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 633   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 634   char *strcopy = (char *) alloca (strlen (string) + 1);
 635   char *p;
 636   for (p = patcopy; *pattern; pattern++, p++)
 637     *p = TOLOWER (*pattern);
 638   *p = '\0';
 639   for (p = strcopy; *string; string++, p++)
 640     *p = TOLOWER (*string);
 641   *p = '\0';
 642   return fnmatch (patcopy, strcopy, flags);
 643 #endif
 644 }
 645
 646 static bool in_acclist (const char *const *, const char *, bool);
 647
 648 /* Determine whether a file is acceptable to be followed, according to
 649    lists of patterns to accept/reject.  */
 650 bool
 651 acceptable (const char *s)
 652 {
 653   int l = strlen (s);
 654
 655   while (l && s[l] != '/')
 656     --l;
 657   if (s[l] == '/')
 658     s += (l + 1);
 659   if (opt.accepts)
 660     {
 661       if (opt.rejects)
 662         return (in_acclist ((const char *const *)opt.accepts, s, true)
 663                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 664       else
 665         return in_acclist ((const char *const *)opt.accepts, s, true);
 666     }
 667   else if (opt.rejects)
 668     return !in_acclist ((const char *const *)opt.rejects, s, true);
 669   return true;
 670 }
 671
 672 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 673    will return true if and only if D2 begins with `/something/' or is exactly
 674    '/something'.  */
 675 bool
 676 subdir_p (const char *d1, const char *d2)
 677 {
 678   if (!opt.ignore_case)
 679     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 680       ;
 681   else
 682     for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
 683       ;
 684
 685   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 686 }
 687
 688 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 689    first element that matches DIR, through wildcards or front comparison (as
 690    appropriate).  */
 691 static bool
 692 dir_matches_p (char **dirlist, const char *dir)
 693 {
 694   char **x;
 695   int (*matcher) (const char *, const char *, int)
 696     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 697
 698   for (x = dirlist; *x; x++)
 699     {
 700       /* Remove leading '/' */
 701       char *p = *x + (**x == '/');
 702       if (has_wildcards_p (p))
 703         {
 704           if (matcher (p, dir, FNM_PATHNAME) == 0)
 705             break;
 706         }
 707       else
 708         {
 709           if (subdir_p (p, dir))
 710             break;
 711         }
 712     }
 713
 714   return *x ? true : false;
 715 }
 716
 717 /* Returns whether DIRECTORY is acceptable for download, wrt the
 718    include/exclude lists.
 719
 720    The leading `/' is ignored in paths; relative and absolute paths
 721    may be freely intermixed.  */
 722
 723 bool
 724 accdir (const char *directory)
 725 {
 726   /* Remove starting '/'.  */
 727   if (*directory == '/')
 728     ++directory;
 729   if (opt.includes)
 730     {
 731       if (!dir_matches_p (opt.includes, directory))
 732         return false;
 733     }
 734   if (opt.excludes)
 735     {
 736       if (dir_matches_p (opt.excludes, directory))
 737         return false;
 738     }
 739   return true;
 740 }
 741
 742 /* Return true if STRING ends with TAIL.  For instance:
 743
 744    match_tail ("abc", "bc", false)  -> 1
 745    match_tail ("abc", "ab", false)  -> 0
 746    match_tail ("abc", "abc", false) -> 1
 747
 748    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 749
 750 bool
 751 match_tail (const char *string, const char *tail, bool fold_case)
 752 {
 753   int i, j;
 754
 755   /* We want this to be fast, so we code two loops, one with
 756      case-folding, one without. */
 757
 758   if (!fold_case)
 759     {
 760       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 761         if (string[i] != tail[j])
 762           break;
 763     }
 764   else
 765     {
 766       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 767         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 768           break;
 769     }
 770
 771   /* If the tail was exhausted, the match was succesful.  */
 772   if (j == -1)
 773     return true;
 774   else
 775     return false;
 776 }
 777
 778 /* Checks whether string S matches each element of ACCEPTS.  A list
 779    element are matched either with fnmatch() or match_tail(),
 780    according to whether the element contains wildcards or not.
 781
 782    If the BACKWARD is false, don't do backward comparison -- just compare
 783    them normally.  */
 784 static bool
 785 in_acclist (const char *const *accepts, const char *s, bool backward)
 786 {
 787   for (; *accepts; accepts++)
 788     {
 789       if (has_wildcards_p (*accepts))
 790         {
 791           int res = opt.ignore_case
 792             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 793           /* fnmatch returns 0 if the pattern *does* match the string.  */
 794           if (res == 0)
 795             return true;
 796         }
 797       else
 798         {
 799           if (backward)
 800             {
 801               if (match_tail (s, *accepts, opt.ignore_case))
 802                 return true;
 803             }
 804           else
 805             {
 806               int cmp = opt.ignore_case
 807                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 808               if (cmp == 0)
 809                 return true;
 810             }
 811         }
 812     }
 813   return false;
 814 }
 815
 816 /* Return the location of STR's suffix (file extension).  Examples:
 817    suffix ("foo.bar")       -> "bar"
 818    suffix ("foo.bar.baz")   -> "baz"
 819    suffix ("/foo/bar")      -> NULL
 820    suffix ("/foo.bar/baz")  -> NULL  */
 821 char *
 822 suffix (const char *str)
 823 {
 824   int i;
 825
 826   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 827     ;
 828
 829   if (str[i++] == '.')
 830     return (char *)str + i;
 831   else
 832     return NULL;
 833 }
 834
 835 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 836    `]').  */
 837
 838 bool
 839 has_wildcards_p (const char *s)
 840 {
 841   for (; *s; s++)
 842     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 843       return true;
 844   return false;
 845 }
 846
 847 /* Return true if FNAME ends with a typical HTML suffix.  The
 848    following (case-insensitive) suffixes are presumed to be HTML
 849    files:
 850
 851      html
 852      htm
 853      ?html (`?' matches one character)
 854
 855    #### CAVEAT.  This is not necessarily a good indication that FNAME
 856    refers to a file that contains HTML!  */
 857 bool
 858 has_html_suffix_p (const char *fname)
 859 {
 860   char *suf;
 861
 862   if ((suf = suffix (fname)) == NULL)
 863     return false;
 864   if (!strcasecmp (suf, "html"))
 865     return true;
 866   if (!strcasecmp (suf, "htm"))
 867     return true;
 868   if (suf[0] && !strcasecmp (suf + 1, "html"))
 869     return true;
 870   return false;
 871 }
 872
 873 /* Read a line from FP and return the pointer to freshly allocated
 874    storage.  The storage space is obtained through malloc() and should
 875    be freed with free() when it is no longer needed.
 876
 877    The length of the line is not limited, except by available memory.
 878    The newline character at the end of line is retained.  The line is
 879    terminated with a zero character.
 880
 881    After end-of-file is encountered without anything being read, NULL
 882    is returned.  NULL is also returned on error.  To distinguish
 883    between these two cases, use the stdio function ferror().  */
 884
 885 char *
 886 read_whole_line (FILE *fp)
 887 {
 888   int length = 0;
 889   int bufsize = 82;
 890   char *line = xmalloc (bufsize);
 891
 892   while (fgets (line + length, bufsize - length, fp))
 893     {
 894       length += strlen (line + length);
 895       if (length == 0)
 896         /* Possible for example when reading from a binary file where
 897            a line begins with \0.  */
 898         continue;
 899
 900       if (line[length - 1] == '\n')
 901         break;
 902
 903       /* fgets() guarantees to read the whole line, or to use up the
 904          space we've given it.  We can double the buffer
 905          unconditionally.  */
 906       bufsize <<= 1;
 907       line = xrealloc (line, bufsize);
 908     }
 909   if (length == 0 || ferror (fp))
 910     {
 911       xfree (line);
 912       return NULL;
 913     }
 914   if (length + 1 < bufsize)
 915     /* Relieve the memory from our exponential greediness.  We say
 916        `length + 1' because the terminating \0 is not included in
 917        LENGTH.  We don't need to zero-terminate the string ourselves,
 918        though, because fgets() does that.  */
 919     line = xrealloc (line, length + 1);
 920   return line;
 921 }
 922 \f
 923 /* Read FILE into memory.  A pointer to `struct file_memory' are
 924    returned; use struct element `content' to access file contents, and
 925    the element `length' to know the file length.  `content' is *not*
 926    zero-terminated, and you should *not* read or write beyond the [0,
 927    length) range of characters.
 928
 929    After you are done with the file contents, call read_file_free to
 930    release the memory.
 931
 932    Depending on the operating system and the type of file that is
 933    being read, read_file() either mmap's the file into memory, or
 934    reads the file into the core using read().
 935
 936    If file is named "-", fileno(stdin) is used for reading instead.
 937    If you want to read from a real file named "-", use "./-" instead.  */
 938
 939 struct file_memory *
 940 read_file (const char *file)
 941 {
 942   int fd;
 943   struct file_memory *fm;
 944   long size;
 945   bool inhibit_close = false;
 946
 947   /* Some magic in the finest tradition of Perl and its kin: if FILE
 948      is "-", just use stdin.  */
 949   if (HYPHENP (file))
 950     {
 951       fd = fileno (stdin);
 952       inhibit_close = true;
 953       /* Note that we don't inhibit mmap() in this case.  If stdin is
 954          redirected from a regular file, mmap() will still work.  */
 955     }
 956   else
 957     fd = open (file, O_RDONLY);
 958   if (fd < 0)
 959     return NULL;
 960   fm = xnew (struct file_memory);
 961
 962 #ifdef HAVE_MMAP
 963   {
 964     struct_fstat buf;
 965     if (fstat (fd, &buf) < 0)
 966       goto mmap_lose;
 967     fm->length = buf.st_size;
 968     /* NOTE: As far as I know, the callers of this function never
 969        modify the file text.  Relying on this would enable us to
 970        specify PROT_READ and MAP_SHARED for a marginal gain in
 971        efficiency, but at some cost to generality.  */
 972     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 973                         MAP_PRIVATE, fd, 0);
 974     if (fm->content == (char *)MAP_FAILED)
 975       goto mmap_lose;
 976     if (!inhibit_close)
 977       close (fd);
 978
 979     fm->mmap_p = 1;
 980     return fm;
 981   }
 982
 983  mmap_lose:
 984   /* The most common reason why mmap() fails is that FD does not point
 985      to a plain file.  However, it's also possible that mmap() doesn't
 986      work for a particular type of file.  Therefore, whenever mmap()
 987      fails, we just fall back to the regular method.  */
 988 #endif /* HAVE_MMAP */
 989
 990   fm->length = 0;
 991   size = 512;                   /* number of bytes fm->contents can
 992                                    hold at any given time. */
 993   fm->content = xmalloc (size);
 994   while (1)
 995     {
 996       wgint nread;
 997       if (fm->length > size / 2)
 998         {
 999           /* #### I'm not sure whether the whole exponential-growth
1000              thing makes sense with kernel read.  On Linux at least,
1001              read() refuses to read more than 4K from a file at a
1002              single chunk anyway.  But other Unixes might optimize it
1003              better, and it doesn't *hurt* anything, so I'm leaving
1004              it.  */
1005
1006           /* Normally, we grow SIZE exponentially to make the number
1007              of calls to read() and realloc() logarithmic in relation
1008              to file size.  However, read() can read an amount of data
1009              smaller than requested, and it would be unreasonable to
1010              double SIZE every time *something* was read.  Therefore,
1011              we double SIZE only when the length exceeds half of the
1012              entire allocated size.  */
1013           size <<= 1;
1014           fm->content = xrealloc (fm->content, size);
1015         }
1016       nread = read (fd, fm->content + fm->length, size - fm->length);
1017       if (nread > 0)
1018         /* Successful read. */
1019         fm->length += nread;
1020       else if (nread < 0)
1021         /* Error. */
1022         goto lose;
1023       else
1024         /* EOF */
1025         break;
1026     }
1027   if (!inhibit_close)
1028     close (fd);
1029   if (size > fm->length && fm->length != 0)
1030     /* Due to exponential growth of fm->content, the allocated region
1031        might be much larger than what is actually needed.  */
1032     fm->content = xrealloc (fm->content, fm->length);
1033   fm->mmap_p = 0;
1034   return fm;
1035
1036  lose:
1037   if (!inhibit_close)
1038     close (fd);
1039   xfree (fm->content);
1040   xfree (fm);
1041   return NULL;
1042 }
1043
1044 /* Release the resources held by FM.  Specifically, this calls
1045    munmap() or xfree() on fm->content, depending whether mmap or
1046    malloc/read were used to read in the file.  It also frees the
1047    memory needed to hold the FM structure itself.  */
1048
1049 void
1050 read_file_free (struct file_memory *fm)
1051 {
1052 #ifdef HAVE_MMAP
1053   if (fm->mmap_p)
1054     {
1055       munmap (fm->content, fm->length);
1056     }
1057   else
1058 #endif
1059     {
1060       xfree (fm->content);
1061     }
1062   xfree (fm);
1063 }
1064 \f
1065 /* Free the pointers in a NULL-terminated vector of pointers, then
1066    free the pointer itself.  */
1067 void
1068 free_vec (char **vec)
1069 {
1070   if (vec)
1071     {
1072       char **p = vec;
1073       while (*p)
1074         xfree (*p++);
1075       xfree (vec);
1076     }
1077 }
1078
1079 /* Append vector V2 to vector V1.  The function frees V2 and
1080    reallocates V1 (thus you may not use the contents of neither
1081    pointer after the call).  If V1 is NULL, V2 is returned.  */
1082 char **
1083 merge_vecs (char **v1, char **v2)
1084 {
1085   int i, j;
1086
1087   if (!v1)
1088     return v2;
1089   if (!v2)
1090     return v1;
1091   if (!*v2)
1092     {
1093       /* To avoid j == 0 */
1094       xfree (v2);
1095       return v1;
1096     }
1097   /* Count v1.  */
1098   for (i = 0; v1[i]; i++)
1099     ;
1100   /* Count v2.  */
1101   for (j = 0; v2[j]; j++)
1102     ;
1103   /* Reallocate v1.  */
1104   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1105   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1106   xfree (v2);
1107   return v1;
1108 }
1109
1110 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1111    is allocated as needed.  Return the new value of the vector. */
1112
1113 char **
1114 vec_append (char **vec, const char *str)
1115 {
1116   int cnt;                      /* count of vector elements, including
1117                                    the one we're about to append */
1118   if (vec != NULL)
1119     {
1120       for (cnt = 0; vec[cnt]; cnt++)
1121         ;
1122       ++cnt;
1123     }
1124   else
1125     cnt = 1;
1126   /* Reallocate the array to fit the new element and the NULL. */
1127   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1128   /* Append a copy of STR to the vector. */
1129   vec[cnt - 1] = xstrdup (str);
1130   vec[cnt] = NULL;
1131   return vec;
1132 }
1133 \f
1134 /* Sometimes it's useful to create "sets" of strings, i.e. special
1135    hash tables where you want to store strings as keys and merely
1136    query for their existence.  Here is a set of utility routines that
1137    makes that transparent.  */
1138
1139 void
1140 string_set_add (struct hash_table *ht, const char *s)
1141 {
1142   /* First check whether the set element already exists.  If it does,
1143      do nothing so that we don't have to free() the old element and
1144      then strdup() a new one.  */
1145   if (hash_table_contains (ht, s))
1146     return;
1147
1148   /* We use "1" as value.  It provides us a useful and clear arbitrary
1149      value, and it consumes no memory -- the pointers to the same
1150      string "1" will be shared by all the key-value pairs in all `set'
1151      hash tables.  */
1152   hash_table_put (ht, xstrdup (s), "1");
1153 }
1154
1155 /* Synonym for hash_table_contains... */
1156
1157 int
1158 string_set_contains (struct hash_table *ht, const char *s)
1159 {
1160   return hash_table_contains (ht, s);
1161 }
1162
1163 /* Convert the specified string set to array.  ARRAY should be large
1164    enough to hold hash_table_count(ht) char pointers.  */
1165
1166 void string_set_to_array (struct hash_table *ht, char **array)
1167 {
1168   hash_table_iterator iter;
1169   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1170     *array++ = iter.key;
1171 }
1172
1173 /* Free the string set.  This frees both the storage allocated for
1174    keys and the actual hash table.  (hash_table_destroy would only
1175    destroy the hash table.)  */
1176
1177 void
1178 string_set_free (struct hash_table *ht)
1179 {
1180   hash_table_iterator iter;
1181   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1182     xfree (iter.key);
1183   hash_table_destroy (ht);
1184 }
1185
1186 /* Utility function: simply call xfree() on all keys and values of HT.  */
1187
1188 void
1189 free_keys_and_values (struct hash_table *ht)
1190 {
1191   hash_table_iterator iter;
1192   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1193     {
1194       xfree (iter.key);
1195       xfree (iter.value);
1196     }
1197 }
1198 \f
1199 /* Get digit grouping data for thousand separors by calling
1200    localeconv().  The data includes separator string and grouping info
1201    and is cached after the first call to the function.
1202
1203    In locales that don't set a thousand separator (such as the "C"
1204    locale), this forces it to be ",".  We are now only showing
1205    thousand separators in one place, so this shouldn't be a problem in
1206    practice.  */
1207
1208 static void
1209 get_grouping_data (const char **sep, const char **grouping)
1210 {
1211   static const char *cached_sep;
1212   static const char *cached_grouping;
1213   static bool initialized;
1214   if (!initialized)
1215     {
1216       /* Get the grouping info from the locale. */
1217       struct lconv *lconv = localeconv ();
1218       cached_sep = lconv->thousands_sep;
1219       cached_grouping = lconv->grouping;
1220       if (!*cached_sep)
1221         {
1222           /* Many locales (such as "C" or "hr_HR") don't specify
1223              grouping, which we still want to use it for legibility.
1224              In those locales set the sep char to ',', unless that
1225              character is used for decimal point, in which case set it
1226              to ".".  */
1227           if (*lconv->decimal_point != ',')
1228             cached_sep = ",";
1229           else
1230             cached_sep = ".";
1231           cached_grouping = "\x03";
1232         }
1233       initialized = true;
1234     }
1235   *sep = cached_sep;
1236   *grouping = cached_grouping;
1237 }
1238
1239 /* Return a printed representation of N with thousand separators.
1240    This should respect locale settings, with the exception of the "C"
1241    locale which mandates no separator, but we use one anyway.
1242
1243    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1244    the separators because it's too non-portable, and it's hard to test
1245    for this feature at configure time.  Besides, it wouldn't display
1246    separators in the "C" locale, still used by many Unix users.  */
1247
1248 const char *
1249 with_thousand_seps (wgint n)
1250 {
1251   static char outbuf[48];
1252   char *p = outbuf + sizeof outbuf;
1253
1254   /* Info received from locale */
1255   const char *grouping, *sep;
1256   int seplen;
1257
1258   /* State information */
1259   int i = 0, groupsize;
1260   const char *atgroup;
1261
1262   bool negative = n < 0;
1263
1264   /* Initialize grouping data. */
1265   get_grouping_data (&sep, &grouping);
1266   seplen = strlen (sep);
1267   atgroup = grouping;
1268   groupsize = *atgroup++;
1269
1270   /* This would overflow on WGINT_MIN, but printing negative numbers
1271      is not an important goal of this fuinction.  */
1272   if (negative)
1273     n = -n;
1274
1275   /* Write the number into the buffer, backwards, inserting the
1276      separators as necessary.  */
1277   *--p = '\0';
1278   while (1)
1279     {
1280       *--p = n % 10 + '0';
1281       n /= 10;
1282       if (n == 0)
1283         break;
1284       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1285       if (++i == groupsize)
1286         {
1287           if (seplen == 1)
1288             *--p = *sep;
1289           else
1290             memcpy (p -= seplen, sep, seplen);
1291           i = 0;
1292           if (*atgroup)
1293             groupsize = *atgroup++;
1294         }
1295     }
1296   if (negative)
1297     *--p = '-';
1298
1299   return p;
1300 }
1301
1302 /* N, a byte quantity, is converted to a human-readable abberviated
1303    form a la sizes printed by `ls -lh'.  The result is written to a
1304    static buffer, a pointer to which is returned.
1305
1306    Unlike `with_thousand_seps', this approximates to the nearest unit.
1307    Quoting GNU libit: "Most people visually process strings of 3-4
1308    digits effectively, but longer strings of digits are more prone to
1309    misinterpretation.  Hence, converting to an abbreviated form
1310    usually improves readability."
1311
1312    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1313    original computer-related meaning of "powers of 1024".  We don't
1314    use the "*bibyte" names invented in 1998, and seldom used in
1315    practice.  Wikipedia's entry on "binary prefix" discusses this in
1316    some detail.  */
1317
1318 char *
1319 human_readable (HR_NUMTYPE n)
1320 {
1321   /* These suffixes are compatible with those of GNU `ls -lh'. */
1322   static char powers[] =
1323     {
1324       'K',                      /* kilobyte, 2^10 bytes */
1325       'M',                      /* megabyte, 2^20 bytes */
1326       'G',                      /* gigabyte, 2^30 bytes */
1327       'T',                      /* terabyte, 2^40 bytes */
1328       'P',                      /* petabyte, 2^50 bytes */
1329       'E',                      /* exabyte,  2^60 bytes */
1330     };
1331   static char buf[8];
1332   int i;
1333
1334   /* If the quantity is smaller than 1K, just print it. */
1335   if (n < 1024)
1336     {
1337       snprintf (buf, sizeof (buf), "%d", (int) n);
1338       return buf;
1339     }
1340
1341   /* Loop over powers, dividing N with 1024 in each iteration.  This
1342      works unchanged for all sizes of wgint, while still avoiding
1343      non-portable `long double' arithmetic.  */
1344   for (i = 0; i < countof (powers); i++)
1345     {
1346       /* At each iteration N is greater than the *subsequent* power.
1347          That way N/1024.0 produces a decimal number in the units of
1348          *this* power.  */
1349       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1350         {
1351           double val = n / 1024.0;
1352           /* Print values smaller than 10 with one decimal digits, and
1353              others without any decimals.  */
1354           snprintf (buf, sizeof (buf), "%.*f%c",
1355                     val < 10 ? 1 : 0, val, powers[i]);
1356           return buf;
1357         }
1358       n /= 1024;
1359     }
1360   return NULL;                  /* unreached */
1361 }
1362
1363 /* Count the digits in the provided number.  Used to allocate space
1364    when printing numbers.  */
1365
1366 int
1367 numdigit (wgint number)
1368 {
1369   int cnt = 1;
1370   if (number < 0)
1371     ++cnt;                      /* accomodate '-' */
1372   while ((number /= 10) != 0)
1373     ++cnt;
1374   return cnt;
1375 }
1376
1377 #define PR(mask) *p++ = n / (mask) + '0'
1378
1379 /* DIGITS_<D> is used to print a D-digit number and should be called
1380    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1381    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1382    Recursively this continues until DIGITS_1 is invoked.  */
1383
1384 #define DIGITS_1(mask) PR (mask)
1385 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1386 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1387 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1388 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1389 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1390 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1391 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1392 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1393 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1394
1395 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1396
1397 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1398 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1399 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1400 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1401 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1402 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1403 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1404 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1405 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1406
1407 /* Shorthand for casting to wgint. */
1408 #define W wgint
1409
1410 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1411    `sprintf(buffer, "%lld", (long long) number)', only typically much
1412    faster and portable to machines without long long.
1413
1414    The speedup may make a difference in programs that frequently
1415    convert numbers to strings.  Some implementations of sprintf,
1416    particularly the one in some versions of GNU libc, have been known
1417    to be quite slow when converting integers to strings.
1418
1419    Return the pointer to the location where the terminating zero was
1420    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1421    function is done.)
1422
1423    BUFFER should be large enough to accept as many bytes as you expect
1424    the number to take up.  On machines with 64-bit wgints the maximum
1425    needed size is 24 bytes.  That includes the digits needed for the
1426    largest 64-bit number, the `-' sign in case it's negative, and the
1427    terminating '\0'.  */
1428
1429 char *
1430 number_to_string (char *buffer, wgint number)
1431 {
1432   char *p = buffer;
1433   wgint n = number;
1434
1435   int last_digit_char = 0;
1436
1437 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1438   /* We are running in a very strange environment.  Leave the correct
1439      printing to sprintf.  */
1440   p += sprintf (buf, "%j", (intmax_t) (n));
1441 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1442
1443   if (n < 0)
1444     {
1445       if (n < -WGINT_MAX)
1446         {
1447           /* n = -n would overflow because -n would evaluate to a
1448              wgint value larger than WGINT_MAX.  Need to make n
1449              smaller and handle the last digit separately.  */
1450           int last_digit = n % 10;
1451           /* The sign of n%10 is implementation-defined. */
1452           if (last_digit < 0)
1453             last_digit_char = '0' - last_digit;
1454           else
1455             last_digit_char = '0' + last_digit;
1456           /* After n is made smaller, -n will not overflow. */
1457           n /= 10;
1458         }
1459
1460       *p++ = '-';
1461       n = -n;
1462     }
1463
1464   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1465      way printing any N is fully open-coded without a loop or jump.
1466      (Also see description of DIGITS_*.)  */
1467
1468   if      (n < 10)                       DIGITS_1 (1);
1469   else if (n < 100)                      DIGITS_2 (10);
1470   else if (n < 1000)                     DIGITS_3 (100);
1471   else if (n < 10000)                    DIGITS_4 (1000);
1472   else if (n < 100000)                   DIGITS_5 (10000);
1473   else if (n < 1000000)                  DIGITS_6 (100000);
1474   else if (n < 10000000)                 DIGITS_7 (1000000);
1475   else if (n < 100000000)                DIGITS_8 (10000000);
1476   else if (n < 1000000000)               DIGITS_9 (100000000);
1477 #if SIZEOF_WGINT == 4
1478   /* wgint is 32 bits wide: no number has more than 10 digits. */
1479   else                                   DIGITS_10 (1000000000);
1480 #else
1481   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1482      Constants are constructed by compile-time multiplication to avoid
1483      dealing with different notations for 64-bit constants
1484      (nL/nLL/nI64, depending on the compiler and architecture).  */
1485   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1486   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1487   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1488   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1489   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1490   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1491   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1492   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1493   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1494   else                                   DIGITS_19 (1000000000*(W)1000000000);
1495 #endif
1496
1497   if (last_digit_char)
1498     *p++ = last_digit_char;
1499
1500   *p = '\0';
1501 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1502
1503   return p;
1504 }
1505
1506 #undef PR
1507 #undef W
1508 #undef SPRINTF_WGINT
1509 #undef DIGITS_1
1510 #undef DIGITS_2
1511 #undef DIGITS_3
1512 #undef DIGITS_4
1513 #undef DIGITS_5
1514 #undef DIGITS_6
1515 #undef DIGITS_7
1516 #undef DIGITS_8
1517 #undef DIGITS_9
1518 #undef DIGITS_10
1519 #undef DIGITS_11
1520 #undef DIGITS_12
1521 #undef DIGITS_13
1522 #undef DIGITS_14
1523 #undef DIGITS_15
1524 #undef DIGITS_16
1525 #undef DIGITS_17
1526 #undef DIGITS_18
1527 #undef DIGITS_19
1528
1529 #define RING_SIZE 3
1530
1531 /* Print NUMBER to a statically allocated string and return a pointer
1532    to the printed representation.
1533
1534    This function is intended to be used in conjunction with printf.
1535    It is hard to portably print wgint values:
1536     a) you cannot use printf("%ld", number) because wgint can be long
1537        long on 32-bit machines with LFS.
1538     b) you cannot use printf("%lld", number) because NUMBER could be
1539        long on 32-bit machines without LFS, or on 64-bit machines,
1540        which do not require LFS.  Also, Windows doesn't support %lld.
1541     c) you cannot use printf("%j", (int_max_t) number) because not all
1542        versions of printf support "%j", the most notable being the one
1543        on Windows.
1544     d) you cannot #define WGINT_FMT to the appropriate format and use
1545        printf(WGINT_FMT, number) because that would break translations
1546        for user-visible messages, such as printf("Downloaded: %d
1547        bytes\n", number).
1548
1549    What you should use instead is printf("%s", number_to_static_string
1550    (number)).
1551
1552    CAVEAT: since the function returns pointers to static data, you
1553    must be careful to copy its result before calling it again.
1554    However, to make it more useful with printf, the function maintains
1555    an internal ring of static buffers to return.  That way things like
1556    printf("%s %s", number_to_static_string (num1),
1557    number_to_static_string (num2)) work as expected.  Three buffers
1558    are currently used, which means that "%s %s %s" will work, but "%s
1559    %s %s %s" won't.  If you need to print more than three wgints,
1560    bump the RING_SIZE (or rethink your message.)  */
1561
1562 char *
1563 number_to_static_string (wgint number)
1564 {
1565   static char ring[RING_SIZE][24];
1566   static int ringpos;
1567   char *buf = ring[ringpos];
1568   number_to_string (buf, number);
1569   ringpos = (ringpos + 1) % RING_SIZE;
1570   return buf;
1571 }
1572 \f
1573 /* Determine the width of the terminal we're running on.  If that's
1574    not possible, return 0.  */
1575
1576 int
1577 determine_screen_width (void)
1578 {
1579   /* If there's a way to get the terminal size using POSIX
1580      tcgetattr(), somebody please tell me.  */
1581 #ifdef TIOCGWINSZ
1582   int fd;
1583   struct winsize wsz;
1584
1585   if (opt.lfilename != NULL)
1586     return 0;
1587
1588   fd = fileno (stderr);
1589   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1590     return 0;                   /* most likely ENOTTY */
1591
1592   return wsz.ws_col;
1593 #elif defined(WINDOWS)
1594   CONSOLE_SCREEN_BUFFER_INFO csbi;
1595   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1596     return 0;
1597   return csbi.dwSize.X;
1598 #else  /* neither TIOCGWINSZ nor WINDOWS */
1599   return 0;
1600 #endif /* neither TIOCGWINSZ nor WINDOWS */
1601 }
1602 \f
1603 /* Whether the rnd system (either rand or [dl]rand48) has been
1604    seeded.  */
1605 static int rnd_seeded;
1606
1607 /* Return a random number between 0 and MAX-1, inclusive.
1608
1609    If the system does not support lrand48 and MAX is greater than the
1610    value of RAND_MAX+1 on the system, the returned value will be in
1611    the range [0, RAND_MAX].  This may be fixed in a future release.
1612    The random number generator is seeded automatically the first time
1613    it is called.
1614
1615    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1616    for cryptography.  It is only meant to be used in situations where
1617    quality of the random numbers returned doesn't really matter.  */
1618
1619 int
1620 random_number (int max)
1621 {
1622 #ifdef HAVE_DRAND48
1623   if (!rnd_seeded)
1624     {
1625       srand48 ((long) time (NULL) ^ (long) getpid ());
1626       rnd_seeded = 1;
1627     }
1628   return lrand48 () % max;
1629 #else  /* not HAVE_DRAND48 */
1630
1631   double bounded;
1632   int rnd;
1633   if (!rnd_seeded)
1634     {
1635       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1636       rnd_seeded = 1;
1637     }
1638   rnd = rand ();
1639
1640   /* Like rand() % max, but uses the high-order bits for better
1641      randomness on architectures where rand() is implemented using a
1642      simple congruential generator.  */
1643
1644   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1645   return (int) bounded;
1646
1647 #endif /* not HAVE_DRAND48 */
1648 }
1649
1650 /* Return a random uniformly distributed floating point number in the
1651    [0, 1) range.  Uses drand48 where available, and a really lame
1652    kludge elsewhere.  */
1653
1654 double
1655 random_float (void)
1656 {
1657 #ifdef HAVE_DRAND48
1658   if (!rnd_seeded)
1659     {
1660       srand48 ((long) time (NULL) ^ (long) getpid ());
1661       rnd_seeded = 1;
1662     }
1663   return drand48 ();
1664 #else  /* not HAVE_DRAND48 */
1665   return (  random_number (10000) / 10000.0
1666           + random_number (10000) / (10000.0 * 10000.0)
1667           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1668           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1669 #endif /* not HAVE_DRAND48 */
1670 }
1671 \f
1672 /* Implementation of run_with_timeout, a generic timeout-forcing
1673    routine for systems with Unix-like signal handling.  */
1674
1675 #ifdef USE_SIGNAL_TIMEOUT
1676 # ifdef HAVE_SIGSETJMP
1677 #  define SETJMP(env) sigsetjmp (env, 1)
1678
1679 static sigjmp_buf run_with_timeout_env;
1680
1681 static void
1682 abort_run_with_timeout (int sig)
1683 {
1684   assert (sig == SIGALRM);
1685   siglongjmp (run_with_timeout_env, -1);
1686 }
1687 # else /* not HAVE_SIGSETJMP */
1688 #  define SETJMP(env) setjmp (env)
1689
1690 static jmp_buf run_with_timeout_env;
1691
1692 static void
1693 abort_run_with_timeout (int sig)
1694 {
1695   assert (sig == SIGALRM);
1696   /* We don't have siglongjmp to preserve the set of blocked signals;
1697      if we longjumped out of the handler at this point, SIGALRM would
1698      remain blocked.  We must unblock it manually. */
1699   int mask = siggetmask ();
1700   mask &= ~sigmask (SIGALRM);
1701   sigsetmask (mask);
1702
1703   /* Now it's safe to longjump. */
1704   longjmp (run_with_timeout_env, -1);
1705 }
1706 # endif /* not HAVE_SIGSETJMP */
1707
1708 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1709    setitimer where available, alarm otherwise.
1710
1711    TIMEOUT should be non-zero.  If the timeout value is so small that
1712    it would be rounded to zero, it is rounded to the least legal value
1713    instead (1us for setitimer, 1s for alarm).  That ensures that
1714    SIGALRM will be delivered in all cases.  */
1715
1716 static void
1717 alarm_set (double timeout)
1718 {
1719 #ifdef ITIMER_REAL
1720   /* Use the modern itimer interface. */
1721   struct itimerval itv;
1722   xzero (itv);
1723   itv.it_value.tv_sec = (long) timeout;
1724   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1725   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1726     /* Ensure that we wait for at least the minimum interval.
1727        Specifying zero would mean "wait forever".  */
1728     itv.it_value.tv_usec = 1;
1729   setitimer (ITIMER_REAL, &itv, NULL);
1730 #else  /* not ITIMER_REAL */
1731   /* Use the old alarm() interface. */
1732   int secs = (int) timeout;
1733   if (secs == 0)
1734     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1735        because alarm(0) means "never deliver the alarm", i.e. "wait
1736        forever", which is not what someone who specifies a 0.5s
1737        timeout would expect.  */
1738     secs = 1;
1739   alarm (secs);
1740 #endif /* not ITIMER_REAL */
1741 }
1742
1743 /* Cancel the alarm set with alarm_set. */
1744
1745 static void
1746 alarm_cancel (void)
1747 {
1748 #ifdef ITIMER_REAL
1749   struct itimerval disable;
1750   xzero (disable);
1751   setitimer (ITIMER_REAL, &disable, NULL);
1752 #else  /* not ITIMER_REAL */
1753   alarm (0);
1754 #endif /* not ITIMER_REAL */
1755 }
1756
1757 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1758    seconds.  Returns true if the function was interrupted with a
1759    timeout, false otherwise.
1760
1761    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1762    using setitimer() or alarm().  The timeout is enforced by
1763    longjumping out of the SIGALRM handler.  This has several
1764    advantages compared to the traditional approach of relying on
1765    signals causing system calls to exit with EINTR:
1766
1767      * The callback function is *forcibly* interrupted after the
1768        timeout expires, (almost) regardless of what it was doing and
1769        whether it was in a syscall.  For example, a calculation that
1770        takes a long time is interrupted as reliably as an IO
1771        operation.
1772
1773      * It works with both SYSV and BSD signals because it doesn't
1774        depend on the default setting of SA_RESTART.
1775
1776      * It doesn't require special handler setup beyond a simple call
1777        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1778        optional.)
1779
1780    The only downside is that, if FUN allocates internal resources that
1781    are normally freed prior to exit from the functions, they will be
1782    lost in case of timeout.  */
1783
1784 bool
1785 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1786 {
1787   int saved_errno;
1788
1789   if (timeout == 0)
1790     {
1791       fun (arg);
1792       return false;
1793     }
1794
1795   signal (SIGALRM, abort_run_with_timeout);
1796   if (SETJMP (run_with_timeout_env) != 0)
1797     {
1798       /* Longjumped out of FUN with a timeout. */
1799       signal (SIGALRM, SIG_DFL);
1800       return true;
1801     }
1802   alarm_set (timeout);
1803   fun (arg);
1804
1805   /* Preserve errno in case alarm() or signal() modifies it. */
1806   saved_errno = errno;
1807   alarm_cancel ();
1808   signal (SIGALRM, SIG_DFL);
1809   errno = saved_errno;
1810
1811   return false;
1812 }
1813
1814 #else  /* not USE_SIGNAL_TIMEOUT */
1815
1816 #ifndef WINDOWS
1817 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1818    define it under Windows, because Windows has its own version of
1819    run_with_timeout that uses threads.  */
1820
1821 bool
1822 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1823 {
1824   fun (arg);
1825   return false;
1826 }
1827 #endif /* not WINDOWS */
1828 #endif /* not USE_SIGNAL_TIMEOUT */
1829 \f
1830 #ifndef WINDOWS
1831
1832 /* Sleep the specified amount of seconds.  On machines without
1833    nanosleep(), this may sleep shorter if interrupted by signals.  */
1834
1835 void
1836 xsleep (double seconds)
1837 {
1838 #ifdef HAVE_NANOSLEEP
1839   /* nanosleep is the preferred interface because it offers high
1840      accuracy and, more importantly, because it allows us to reliably
1841      restart receiving a signal such as SIGWINCH.  (There was an
1842      actual Debian bug report about --limit-rate malfunctioning while
1843      the terminal was being resized.)  */
1844   struct timespec sleep, remaining;
1845   sleep.tv_sec = (long) seconds;
1846   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1847   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1848     /* If nanosleep has been interrupted by a signal, adjust the
1849        sleeping period and return to sleep.  */
1850     sleep = remaining;
1851 #elif defined(HAVE_USLEEP)
1852   /* If usleep is available, use it in preference to select.  */
1853   if (seconds >= 1)
1854     {
1855       /* On some systems, usleep cannot handle values larger than
1856          1,000,000.  If the period is larger than that, use sleep
1857          first, then add usleep for subsecond accuracy.  */
1858       sleep (seconds);
1859       seconds -= (long) seconds;
1860     }
1861   usleep (seconds * 1000000);
1862 #else /* fall back select */
1863   /* Note that, although Windows supports select, it can't be used to
1864      implement sleeping because Winsock's select doesn't implement
1865      timeout when it is passed NULL pointers for all fd sets.  (But it
1866      does under Cygwin, which implements Unix-compatible select.)  */
1867   struct timeval sleep;
1868   sleep.tv_sec = (long) seconds;
1869   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1870   select (0, NULL, NULL, NULL, &sleep);
1871   /* If select returns -1 and errno is EINTR, it means we were
1872      interrupted by a signal.  But without knowing how long we've
1873      actually slept, we can't return to sleep.  Using gettimeofday to
1874      track sleeps is slow and unreliable due to clock skew.  */
1875 #endif
1876 }
1877
1878 #endif /* not WINDOWS */
1879
1880 /* Encode the octets in DATA of length LENGTH to base64 format,
1881    storing the result to DEST.  The output will be zero-terminated,
1882    and must point to a writable buffer of at least
1883    1+BASE64_LENGTH(length) bytes.  The function returns the length of
1884    the resulting base64 data, not counting the terminating zero.
1885
1886    This implementation does not emit newlines after 76 characters of
1887    base64 data.  */
1888
1889 int
1890 base64_encode (const void *data, int length, char *dest)
1891 {
1892   /* Conversion table.  */
1893   static const char tbl[64] = {
1894     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1895     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1896     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1897     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1898   };
1899   /* Access bytes in DATA as unsigned char, otherwise the shifts below
1900      don't work for data with MSB set. */
1901   const unsigned char *s = data;
1902   /* Theoretical ANSI violation when length < 3. */
1903   const unsigned char *end = (const unsigned char *) data + length - 2;
1904   char *p = dest;
1905
1906   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1907   for (; s < end; s += 3)
1908     {
1909       *p++ = tbl[s[0] >> 2];
1910       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1911       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1912       *p++ = tbl[s[2] & 0x3f];
1913     }
1914
1915   /* Pad the result if necessary...  */
1916   switch (length % 3)
1917     {
1918     case 1:
1919       *p++ = tbl[s[0] >> 2];
1920       *p++ = tbl[(s[0] & 3) << 4];
1921       *p++ = '=';
1922       *p++ = '=';
1923       break;
1924     case 2:
1925       *p++ = tbl[s[0] >> 2];
1926       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1927       *p++ = tbl[((s[1] & 0xf) << 2)];
1928       *p++ = '=';
1929       break;
1930     }
1931   /* ...and zero-terminate it.  */
1932   *p = '\0';
1933
1934   return p - dest;
1935 }
1936
1937 /* Store in C the next non-whitespace character from the string, or \0
1938    when end of string is reached.  */
1939 #define NEXT_CHAR(c, p) do {                    \
1940   c = (unsigned char) *p++;                     \
1941 } while (ISSPACE (c))
1942
1943 #define IS_ASCII(c) (((c) & 0x80) == 0)
1944
1945 /* Decode data from BASE64 (a null-terminated string) into memory
1946    pointed to by DEST.  DEST is assumed to be large enough to
1947    accomodate the decoded data, which is guaranteed to be no more than
1948    3/4*strlen(base64).
1949
1950    Since DEST is assumed to contain binary data, it is not
1951    NUL-terminated.  The function returns the length of the data
1952    written to TO.  -1 is returned in case of error caused by malformed
1953    base64 input.
1954
1955    This function originates from Free Recode.  */
1956
1957 int
1958 base64_decode (const char *base64, void *dest)
1959 {
1960   /* Table of base64 values for first 128 characters.  Note that this
1961      assumes ASCII (but so does Wget in other places).  */
1962   static const signed char base64_char_to_value[128] =
1963     {
1964       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
1965       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
1966       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
1967       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
1968       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
1969       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
1970       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
1971       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
1972       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
1973       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
1974       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
1975       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
1976       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
1977     };
1978 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
1979 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
1980
1981   const char *p = base64;
1982   char *q = dest;
1983
1984   while (1)
1985     {
1986       unsigned char c;
1987       unsigned long value;
1988
1989       /* Process first byte of a quadruplet.  */
1990       NEXT_CHAR (c, p);
1991       if (!c)
1992         break;
1993       if (c == '=' || !IS_BASE64 (c))
1994         return -1;              /* illegal char while decoding base64 */
1995       value = BASE64_CHAR_TO_VALUE (c) << 18;
1996
1997       /* Process second byte of a quadruplet.  */
1998       NEXT_CHAR (c, p);
1999       if (!c)
2000         return -1;              /* premature EOF while decoding base64 */
2001       if (c == '=' || !IS_BASE64 (c))
2002         return -1;              /* illegal char while decoding base64 */
2003       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2004       *q++ = value >> 16;
2005
2006       /* Process third byte of a quadruplet.  */
2007       NEXT_CHAR (c, p);
2008       if (!c)
2009         return -1;              /* premature EOF while decoding base64 */
2010       if (!IS_BASE64 (c))
2011         return -1;              /* illegal char while decoding base64 */
2012
2013       if (c == '=')
2014         {
2015           NEXT_CHAR (c, p);
2016           if (!c)
2017             return -1;          /* premature EOF while decoding base64 */
2018           if (c != '=')
2019             return -1;          /* padding `=' expected but not found */
2020           continue;
2021         }
2022
2023       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2024       *q++ = 0xff & value >> 8;
2025
2026       /* Process fourth byte of a quadruplet.  */
2027       NEXT_CHAR (c, p);
2028       if (!c)
2029         return -1;              /* premature EOF while decoding base64 */
2030       if (c == '=')
2031         continue;
2032       if (!IS_BASE64 (c))
2033         return -1;              /* illegal char while decoding base64 */
2034
2035       value |= BASE64_CHAR_TO_VALUE (c);
2036       *q++ = 0xff & value;
2037     }
2038 #undef IS_BASE64
2039 #undef BASE64_CHAR_TO_VALUE
2040
2041   return q - (char *) dest;
2042 }
2043
2044 #undef IS_ASCII
2045 #undef NEXT_CHAR
2046 \f
2047 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2048    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2049
2050 static void
2051 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2052                     int (*cmpfun) (const void *, const void *))
2053 {
2054 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2055   if (from < to)
2056     {
2057       size_t i, j, k;
2058       size_t mid = (to + from) / 2;
2059       mergesort_internal (base, temp, size, from, mid, cmpfun);
2060       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2061       i = from;
2062       j = mid + 1;
2063       for (k = from; (i <= mid) && (j <= to); k++)
2064         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2065           memcpy (ELT (temp, k), ELT (base, i++), size);
2066         else
2067           memcpy (ELT (temp, k), ELT (base, j++), size);
2068       while (i <= mid)
2069         memcpy (ELT (temp, k++), ELT (base, i++), size);
2070       while (j <= to)
2071         memcpy (ELT (temp, k++), ELT (base, j++), size);
2072       for (k = from; k <= to; k++)
2073         memcpy (ELT (base, k), ELT (temp, k), size);
2074     }
2075 #undef ELT
2076 }
2077
2078 /* Stable sort with interface exactly like standard library's qsort.
2079    Uses mergesort internally, allocating temporary storage with
2080    alloca.  */
2081
2082 void
2083 stable_sort (void *base, size_t nmemb, size_t size,
2084              int (*cmpfun) (const void *, const void *))
2085 {
2086   if (size > 1)
2087     {
2088       void *temp = alloca (nmemb * size * sizeof (void *));
2089       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2090     }
2091 }
2092 \f
2093 /* Print a decimal number.  If it is equal to or larger than ten, the
2094    number is rounded.  Otherwise it is printed with one significant
2095    digit without trailing zeros and with no more than three fractional
2096    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2097    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2098
2099    This is useful for displaying durations because it provides
2100    order-of-magnitude information without unnecessary clutter --
2101    long-running downloads are shown without the fractional part, and
2102    short ones still retain one significant digit.  */
2103
2104 const char *
2105 print_decimal (double number)
2106 {
2107   static char buf[32];
2108   double n = number >= 0 ? number : -number;
2109
2110   if (n >= 9.95)
2111     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2112        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2113     snprintf (buf, sizeof buf, "%.0f", number);
2114   else if (n >= 0.95)
2115     snprintf (buf, sizeof buf, "%.1f", number);
2116   else if (n >= 0.001)
2117     snprintf (buf, sizeof buf, "%.1g", number);
2118   else if (n >= 0.0005)
2119     /* round [0.0005, 0.001) to 0.001 */
2120     snprintf (buf, sizeof buf, "%.3f", number);
2121   else
2122     /* print numbers close to 0 as 0, not 0.000 */
2123     strcpy (buf, "0");
2124
2125   return buf;
2126 }
2127
2128 #ifdef TESTING
2129
2130 const char *
2131 test_subdir_p()
2132 {
2133   int i;
2134   struct {
2135     char *d1;
2136     char *d2;
2137     bool result;
2138   } test_array[] = {
2139     { "/somedir", "/somedir", true },
2140     { "/somedir", "/somedir/d2", true },
2141     { "/somedir/d1", "/somedir", false },
2142   };
2143
2144   for (i = 0; i < countof(test_array); ++i)
2145     {
2146       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2147
2148       mu_assert ("test_subdir_p: wrong result",
2149                  res == test_array[i].result);
2150     }
2151
2152   return NULL;
2153 }
2154
2155 const char *
2156 test_dir_matches_p()
2157 {
2158   int i;
2159   struct {
2160     char *dirlist[3];
2161     char *dir;
2162     bool result;
2163   } test_array[] = {
2164     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2165     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2166     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2167     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2168     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2169   };
2170
2171   for (i = 0; i < countof(test_array); ++i)
2172     {
2173       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2174
2175       mu_assert ("test_dir_matches_p: wrong result",
2176                  res == test_array[i].result);
2177     }
2178
2179   return NULL;
2180 }
2181
2182 #endif /* TESTING */
2183