sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996-2006 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9 (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software Foundation, Inc.,
  18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <time.h>
  36 #ifdef HAVE_SYS_TIME_H
  37 # include <sys/time.h>
  38 #endif
  39 #ifdef HAVE_UNISTD_H
  40 # include <unistd.h>
  41 #endif
  42 #ifdef HAVE_MMAP
  43 # include <sys/mman.h>
  44 #endif
  45 #ifdef HAVE_UTIME_H
  46 # include <utime.h>
  47 #endif
  48 #ifdef HAVE_SYS_UTIME_H
  49 # include <sys/utime.h>
  50 #endif
  51 #include <errno.h>
  52 #include <fcntl.h>
  53 #include <assert.h>
  54 #include <stdarg.h>
  55 #include <locale.h>
  56
  57 /* For TIOCGWINSZ and friends: */
  58 #ifdef HAVE_SYS_IOCTL_H
  59 # include <sys/ioctl.h>
  60 #endif
  61 #ifdef HAVE_TERMIOS_H
  62 # include <termios.h>
  63 #endif
  64
  65 /* Needed for Unix version of run_with_timeout. */
  66 #include <signal.h>
  67 #include <setjmp.h>
  68
  69 #ifndef HAVE_SIGSETJMP
  70 /* If sigsetjmp is a macro, configure won't pick it up. */
  71 # ifdef sigsetjmp
  72 #  define HAVE_SIGSETJMP
  73 # endif
  74 #endif
  75
  76 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  77 # define USE_SIGNAL_TIMEOUT
  78 #endif
  79
  80 #include "wget.h"
  81 #include "utils.h"
  82 #include "hash.h"
  83
  84 #ifdef TESTING
  85 #include "test.h"
  86 #endif
  87
  88 /* Utility function: like xstrdup(), but also lowercases S.  */
  89
  90 char *
  91 xstrdup_lower (const char *s)
  92 {
  93   char *copy = xstrdup (s);
  94   char *p = copy;
  95   for (; *p; p++)
  96     *p = TOLOWER (*p);
  97   return copy;
  98 }
  99
 100 /* Copy the string formed by two pointers (one on the beginning, other
 101    on the char after the last char) to a new, malloc-ed location.
 102    0-terminate it.  */
 103 char *
 104 strdupdelim (const char *beg, const char *end)
 105 {
 106   char *res = xmalloc (end - beg + 1);
 107   memcpy (res, beg, end - beg);
 108   res[end - beg] = '\0';
 109   return res;
 110 }
 111
 112 /* Parse a string containing comma-separated elements, and return a
 113    vector of char pointers with the elements.  Spaces following the
 114    commas are ignored.  */
 115 char **
 116 sepstring (const char *s)
 117 {
 118   char **res;
 119   const char *p;
 120   int i = 0;
 121
 122   if (!s || !*s)
 123     return NULL;
 124   res = NULL;
 125   p = s;
 126   while (*s)
 127     {
 128       if (*s == ',')
 129         {
 130           res = xrealloc (res, (i + 2) * sizeof (char *));
 131           res[i] = strdupdelim (p, s);
 132           res[++i] = NULL;
 133           ++s;
 134           /* Skip the blanks following the ','.  */
 135           while (ISSPACE (*s))
 136             ++s;
 137           p = s;
 138         }
 139       else
 140         ++s;
 141     }
 142   res = xrealloc (res, (i + 2) * sizeof (char *));
 143   res[i] = strdupdelim (p, s);
 144   res[i + 1] = NULL;
 145   return res;
 146 }
 147 \f
 148 /* Like sprintf, but prints into a string of sufficient size freshly
 149    allocated with malloc, which is returned.  If unable to print due
 150    to invalid format, returns NULL.  Inability to allocate needed
 151    memory results in abort, as with xmalloc.  This is in spirit
 152    similar to the GNU/BSD extension asprintf, but somewhat easier to
 153    use.
 154
 155    Internally the function either calls vasprintf or loops around
 156    vsnprintf until the correct size is found.  Since Wget also ships a
 157    fallback implementation of vsnprintf, this should be portable.  */
 158
 159 char *
 160 aprintf (const char *fmt, ...)
 161 {
 162 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 163   /* Use vasprintf. */
 164   int ret;
 165   va_list args;
 166   char *str;
 167   va_start (args, fmt);
 168   ret = vasprintf (&str, fmt, args);
 169   va_end (args);
 170   if (ret < 0 && errno == ENOMEM)
 171     abort ();                   /* for consistency with xmalloc/xrealloc */
 172   else if (ret < 0)
 173     return NULL;
 174   return str;
 175 #else  /* not HAVE_VASPRINTF */
 176
 177   /* vasprintf is unavailable.  snprintf into a small buffer and
 178      resize it as necessary. */
 179   int size = 32;
 180   char *str = xmalloc (size);
 181
 182   /* #### This code will infloop and eventually abort in xrealloc if
 183      passed a FMT that causes snprintf to consistently return -1.  */
 184
 185   while (1)
 186     {
 187       int n;
 188       va_list args;
 189
 190       va_start (args, fmt);
 191       n = vsnprintf (str, size, fmt, args);
 192       va_end (args);
 193
 194       /* If the printing worked, return the string. */
 195       if (n > -1 && n < size)
 196         return str;
 197
 198       /* Else try again with a larger buffer. */
 199       if (n > -1)               /* C99 */
 200         size = n + 1;           /* precisely what is needed */
 201       else
 202         size <<= 1;             /* twice the old size */
 203       str = xrealloc (str, size);
 204     }
 205 #endif /* not HAVE_VASPRINTF */
 206 }
 207
 208 /* Concatenate the NULL-terminated list of string arguments into
 209    freshly allocated space.  */
 210
 211 char *
 212 concat_strings (const char *str0, ...)
 213 {
 214   va_list args;
 215   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 216   char *ret, *p;
 217
 218   const char *next_str;
 219   int total_length = 0;
 220   int argcount;
 221
 222   /* Calculate the length of and allocate the resulting string. */
 223
 224   argcount = 0;
 225   va_start (args, str0);
 226   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 227     {
 228       int len = strlen (next_str);
 229       if (argcount < countof (saved_lengths))
 230         saved_lengths[argcount++] = len;
 231       total_length += len;
 232     }
 233   va_end (args);
 234   p = ret = xmalloc (total_length + 1);
 235
 236   /* Copy the strings into the allocated space. */
 237
 238   argcount = 0;
 239   va_start (args, str0);
 240   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 241     {
 242       int len;
 243       if (argcount < countof (saved_lengths))
 244         len = saved_lengths[argcount++];
 245       else
 246         len = strlen (next_str);
 247       memcpy (p, next_str, len);
 248       p += len;
 249     }
 250   va_end (args);
 251   *p = '\0';
 252
 253   return ret;
 254 }
 255 \f
 256 /* Format the provided time according to the specified format.  The
 257    format is a string with format elements supported by strftime.  */
 258
 259 static char *
 260 fmttime (time_t t, const char *fmt)
 261 {
 262   static char output[32];
 263   struct tm *tm = localtime(&t);
 264   if (!tm)
 265     abort ();
 266   if (!strftime(output, sizeof(output), fmt, tm))
 267     abort ();
 268   return output;
 269 }
 270
 271 /* Return pointer to a static char[] buffer in which zero-terminated
 272    string-representation of TM (in form hh:mm:ss) is printed.
 273
 274    If TM is NULL, the current time will be used.  */
 275
 276 char *
 277 time_str (time_t t)
 278 {
 279   return fmttime(t, "%H:%M:%S");
 280 }
 281
 282 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 283
 284 char *
 285 datetime_str (time_t t)
 286 {
 287   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 288 }
 289 \f
 290 /* The Windows versions of the following two functions are defined in
 291    mswindows.c.  */
 292
 293 #ifndef WINDOWS
 294 void
 295 fork_to_background (void)
 296 {
 297   pid_t pid;
 298   /* Whether we arrange our own version of opt.lfilename here.  */
 299   bool logfile_changed = false;
 300
 301   if (!opt.lfilename)
 302     {
 303       /* We must create the file immediately to avoid either a race
 304          condition (which arises from using unique_name and failing to
 305          use fopen_excl) or lying to the user about the log file name
 306          (which arises from using unique_name, printing the name, and
 307          using fopen_excl later on.)  */
 308       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 309       if (new_log_fp)
 310         {
 311           logfile_changed = true;
 312           fclose (new_log_fp);
 313         }
 314     }
 315   pid = fork ();
 316   if (pid < 0)
 317     {
 318       /* parent, error */
 319       perror ("fork");
 320       exit (1);
 321     }
 322   else if (pid != 0)
 323     {
 324       /* parent, no error */
 325       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 326       if (logfile_changed)
 327         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 328       exit (0);                 /* #### should we use _exit()? */
 329     }
 330
 331   /* child: give up the privileges and keep running. */
 332   setsid ();
 333   freopen ("/dev/null", "r", stdin);
 334   freopen ("/dev/null", "w", stdout);
 335   freopen ("/dev/null", "w", stderr);
 336 }
 337 #endif /* not WINDOWS */
 338 \f
 339 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 340    specified with TM.  The atime ("access time") is set to the current
 341    time.  */
 342
 343 void
 344 touch (const char *file, time_t tm)
 345 {
 346 #ifdef HAVE_STRUCT_UTIMBUF
 347   struct utimbuf times;
 348 #else
 349   struct {
 350     time_t actime;
 351     time_t modtime;
 352   } times;
 353 #endif
 354   times.modtime = tm;
 355   times.actime = time (NULL);
 356   if (utime (file, &times) == -1)
 357     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 358 }
 359
 360 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 361    nothing under MS-Windows.  */
 362 int
 363 remove_link (const char *file)
 364 {
 365   int err = 0;
 366   struct_stat st;
 367
 368   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 369     {
 370       DEBUGP (("Unlinking %s (symlink).\n", file));
 371       err = unlink (file);
 372       if (err != 0)
 373         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 374                    file, strerror (errno));
 375     }
 376   return err;
 377 }
 378
 379 /* Does FILENAME exist?  This is quite a lousy implementation, since
 380    it supplies no error codes -- only a yes-or-no answer.  Thus it
 381    will return that a file does not exist if, e.g., the directory is
 382    unreadable.  I don't mind it too much currently, though.  The
 383    proper way should, of course, be to have a third, error state,
 384    other than true/false, but that would introduce uncalled-for
 385    additional complexity to the callers.  */
 386 bool
 387 file_exists_p (const char *filename)
 388 {
 389 #ifdef HAVE_ACCESS
 390   return access (filename, F_OK) >= 0;
 391 #else
 392   struct_stat buf;
 393   return stat (filename, &buf) >= 0;
 394 #endif
 395 }
 396
 397 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 398    Returns 0 on error.  */
 399 bool
 400 file_non_directory_p (const char *path)
 401 {
 402   struct_stat buf;
 403   /* Use lstat() rather than stat() so that symbolic links pointing to
 404      directories can be identified correctly.  */
 405   if (lstat (path, &buf) != 0)
 406     return false;
 407   return S_ISDIR (buf.st_mode) ? false : true;
 408 }
 409
 410 /* Return the size of file named by FILENAME, or -1 if it cannot be
 411    opened or seeked into. */
 412 wgint
 413 file_size (const char *filename)
 414 {
 415 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 416   wgint size;
 417   /* We use fseek rather than stat to determine the file size because
 418      that way we can also verify that the file is readable without
 419      explicitly checking for permissions.  Inspired by the POST patch
 420      by Arnaud Wylie.  */
 421   FILE *fp = fopen (filename, "rb");
 422   if (!fp)
 423     return -1;
 424   fseeko (fp, 0, SEEK_END);
 425   size = ftello (fp);
 426   fclose (fp);
 427   return size;
 428 #else
 429   struct_stat st;
 430   if (stat (filename, &st) < 0)
 431     return -1;
 432   return st.st_size;
 433 #endif
 434 }
 435
 436 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 437    doesn't exist is found.  Return a freshly allocated copy of the
 438    unused file name.  */
 439
 440 static char *
 441 unique_name_1 (const char *prefix)
 442 {
 443   int count = 1;
 444   int plen = strlen (prefix);
 445   char *template = (char *)alloca (plen + 1 + 24);
 446   char *template_tail = template + plen;
 447
 448   memcpy (template, prefix, plen);
 449   *template_tail++ = '.';
 450
 451   do
 452     number_to_string (template_tail, count++);
 453   while (file_exists_p (template));
 454
 455   return xstrdup (template);
 456 }
 457
 458 /* Return a unique file name, based on FILE.
 459
 460    More precisely, if FILE doesn't exist, it is returned unmodified.
 461    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 462    file name that doesn't exist is returned.
 463
 464    The resulting file is not created, only verified that it didn't
 465    exist at the point in time when the function was called.
 466    Therefore, where security matters, don't rely that the file created
 467    by this function exists until you open it with O_EXCL or
 468    equivalent.
 469
 470    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 471    string.  Otherwise, it may return FILE if the file doesn't exist
 472    (and therefore doesn't need changing).  */
 473
 474 char *
 475 unique_name (const char *file, bool allow_passthrough)
 476 {
 477   /* If the FILE itself doesn't exist, return it without
 478      modification. */
 479   if (!file_exists_p (file))
 480     return allow_passthrough ? (char *)file : xstrdup (file);
 481
 482   /* Otherwise, find a numeric suffix that results in unused file name
 483      and return it.  */
 484   return unique_name_1 (file);
 485 }
 486
 487 /* Create a file based on NAME, except without overwriting an existing
 488    file with that name.  Providing O_EXCL is correctly implemented,
 489    this function does not have the race condition associated with
 490    opening the file returned by unique_name.  */
 491
 492 FILE *
 493 unique_create (const char *name, bool binary, char **opened_name)
 494 {
 495   /* unique file name, based on NAME */
 496   char *uname = unique_name (name, false);
 497   FILE *fp;
 498   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 499     {
 500       xfree (uname);
 501       uname = unique_name (name, false);
 502     }
 503   if (opened_name && fp != NULL)
 504     {
 505       if (fp)
 506         *opened_name = uname;
 507       else
 508         {
 509           *opened_name = NULL;
 510           xfree (uname);
 511         }
 512     }
 513   else
 514     xfree (uname);
 515   return fp;
 516 }
 517
 518 /* Open the file for writing, with the addition that the file is
 519    opened "exclusively".  This means that, if the file already exists,
 520    this function will *fail* and errno will be set to EEXIST.  If
 521    BINARY is set, the file will be opened in binary mode, equivalent
 522    to fopen's "wb".
 523
 524    If opening the file fails for any reason, including the file having
 525    previously existed, this function returns NULL and sets errno
 526    appropriately.  */
 527
 528 FILE *
 529 fopen_excl (const char *fname, bool binary)
 530 {
 531   int fd;
 532 #ifdef O_EXCL
 533   int flags = O_WRONLY | O_CREAT | O_EXCL;
 534 # ifdef O_BINARY
 535   if (binary)
 536     flags |= O_BINARY;
 537 # endif
 538   fd = open (fname, flags, 0666);
 539   if (fd < 0)
 540     return NULL;
 541   return fdopen (fd, binary ? "wb" : "w");
 542 #else  /* not O_EXCL */
 543   /* Manually check whether the file exists.  This is prone to race
 544      conditions, but systems without O_EXCL haven't deserved
 545      better.  */
 546   if (file_exists_p (fname))
 547     {
 548       errno = EEXIST;
 549       return NULL;
 550     }
 551   return fopen (fname, binary ? "wb" : "w");
 552 #endif /* not O_EXCL */
 553 }
 554 \f
 555 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 556    are missing, create them first.  In case any mkdir() call fails,
 557    return its error status.  Returns 0 on successful completion.
 558
 559    The behaviour of this function should be identical to the behaviour
 560    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 561 int
 562 make_directory (const char *directory)
 563 {
 564   int i, ret, quit = 0;
 565   char *dir;
 566
 567   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 568      function is unsafe if called with a read-only char *argument.  */
 569   STRDUP_ALLOCA (dir, directory);
 570
 571   /* If the first character of dir is '/', skip it (and thus enable
 572      creation of absolute-pathname directories.  */
 573   for (i = (*dir == '/'); 1; ++i)
 574     {
 575       for (; dir[i] && dir[i] != '/'; i++)
 576         ;
 577       if (!dir[i])
 578         quit = 1;
 579       dir[i] = '\0';
 580       /* Check whether the directory already exists.  Allow creation of
 581          of intermediate directories to fail, as the initial path components
 582          are not necessarily directories!  */
 583       if (!file_exists_p (dir))
 584         ret = mkdir (dir, 0777);
 585       else
 586         ret = 0;
 587       if (quit)
 588         break;
 589       else
 590         dir[i] = '/';
 591     }
 592   return ret;
 593 }
 594
 595 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 596    should be a file name.
 597
 598    file_merge("/foo/bar", "baz")  => "/foo/baz"
 599    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 600    file_merge("foo", "bar")       => "bar"
 601
 602    In other words, it's a simpler and gentler version of uri_merge.  */
 603
 604 char *
 605 file_merge (const char *base, const char *file)
 606 {
 607   char *result;
 608   const char *cut = (const char *)strrchr (base, '/');
 609
 610   if (!cut)
 611     return xstrdup (file);
 612
 613   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 614   memcpy (result, base, cut - base);
 615   result[cut - base] = '/';
 616   strcpy (result + (cut - base) + 1, file);
 617
 618   return result;
 619 }
 620 \f
 621 /* Like fnmatch, but performs a case-insensitive match.  */
 622
 623 int
 624 fnmatch_nocase (const char *pattern, const char *string, int flags)
 625 {
 626 #ifdef FNM_CASEFOLD
 627   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 628      also present on *BSD platforms, and possibly elsewhere.  */
 629   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 630 #else
 631   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 632   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 633   char *strcopy = (char *) alloca (strlen (string) + 1);
 634   char *p;
 635   for (p = patcopy; *pattern; pattern++, p++)
 636     *p = TOLOWER (*pattern);
 637   *p = '\0';
 638   for (p = strcopy; *string; string++, p++)
 639     *p = TOLOWER (*string);
 640   *p = '\0';
 641   return fnmatch (patcopy, strcopy, flags);
 642 #endif
 643 }
 644
 645 static bool in_acclist (const char *const *, const char *, bool);
 646
 647 /* Determine whether a file is acceptable to be followed, according to
 648    lists of patterns to accept/reject.  */
 649 bool
 650 acceptable (const char *s)
 651 {
 652   int l = strlen (s);
 653
 654   while (l && s[l] != '/')
 655     --l;
 656   if (s[l] == '/')
 657     s += (l + 1);
 658   if (opt.accepts)
 659     {
 660       if (opt.rejects)
 661         return (in_acclist ((const char *const *)opt.accepts, s, true)
 662                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 663       else
 664         return in_acclist ((const char *const *)opt.accepts, s, true);
 665     }
 666   else if (opt.rejects)
 667     return !in_acclist ((const char *const *)opt.rejects, s, true);
 668   return true;
 669 }
 670
 671 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 672    will return true if and only if D2 begins with `/something/' or is exactly
 673    '/something'.  */
 674 bool
 675 subdir_p (const char *d1, const char *d2)
 676 {
 677   if (!opt.ignore_case)
 678     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 679       ;
 680   else
 681     for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
 682       ;
 683
 684   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 685 }
 686
 687 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 688    first element that matches DIR, through wildcards or front comparison (as
 689    appropriate).  */
 690 static bool
 691 dir_matches_p (char **dirlist, const char *dir)
 692 {
 693   char **x;
 694   int (*matcher) (const char *, const char *, int)
 695     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 696
 697   for (x = dirlist; *x; x++)
 698     {
 699       /* Remove leading '/' */
 700       char *p = *x + (**x == '/');
 701       if (has_wildcards_p (p))
 702         {
 703           if (matcher (p, dir, FNM_PATHNAME) == 0)
 704             break;
 705         }
 706       else
 707         {
 708           if (subdir_p (p, dir))
 709             break;
 710         }
 711     }
 712
 713   return *x ? true : false;
 714 }
 715
 716 /* Returns whether DIRECTORY is acceptable for download, wrt the
 717    include/exclude lists.
 718
 719    The leading `/' is ignored in paths; relative and absolute paths
 720    may be freely intermixed.  */
 721
 722 bool
 723 accdir (const char *directory)
 724 {
 725   /* Remove starting '/'.  */
 726   if (*directory == '/')
 727     ++directory;
 728   if (opt.includes)
 729     {
 730       if (!dir_matches_p (opt.includes, directory))
 731         return false;
 732     }
 733   if (opt.excludes)
 734     {
 735       if (dir_matches_p (opt.excludes, directory))
 736         return false;
 737     }
 738   return true;
 739 }
 740
 741 /* Return true if STRING ends with TAIL.  For instance:
 742
 743    match_tail ("abc", "bc", false)  -> 1
 744    match_tail ("abc", "ab", false)  -> 0
 745    match_tail ("abc", "abc", false) -> 1
 746
 747    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 748
 749 bool
 750 match_tail (const char *string, const char *tail, bool fold_case)
 751 {
 752   int i, j;
 753
 754   /* We want this to be fast, so we code two loops, one with
 755      case-folding, one without. */
 756
 757   if (!fold_case)
 758     {
 759       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 760         if (string[i] != tail[j])
 761           break;
 762     }
 763   else
 764     {
 765       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 766         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 767           break;
 768     }
 769
 770   /* If the tail was exhausted, the match was succesful.  */
 771   if (j == -1)
 772     return true;
 773   else
 774     return false;
 775 }
 776
 777 /* Checks whether string S matches each element of ACCEPTS.  A list
 778    element are matched either with fnmatch() or match_tail(),
 779    according to whether the element contains wildcards or not.
 780
 781    If the BACKWARD is false, don't do backward comparison -- just compare
 782    them normally.  */
 783 static bool
 784 in_acclist (const char *const *accepts, const char *s, bool backward)
 785 {
 786   for (; *accepts; accepts++)
 787     {
 788       if (has_wildcards_p (*accepts))
 789         {
 790           int res = opt.ignore_case
 791             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 792           /* fnmatch returns 0 if the pattern *does* match the string.  */
 793           if (res == 0)
 794             return true;
 795         }
 796       else
 797         {
 798           if (backward)
 799             {
 800               if (match_tail (s, *accepts, opt.ignore_case))
 801                 return true;
 802             }
 803           else
 804             {
 805               int cmp = opt.ignore_case
 806                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 807               if (cmp == 0)
 808                 return true;
 809             }
 810         }
 811     }
 812   return false;
 813 }
 814
 815 /* Return the location of STR's suffix (file extension).  Examples:
 816    suffix ("foo.bar")       -> "bar"
 817    suffix ("foo.bar.baz")   -> "baz"
 818    suffix ("/foo/bar")      -> NULL
 819    suffix ("/foo.bar/baz")  -> NULL  */
 820 char *
 821 suffix (const char *str)
 822 {
 823   int i;
 824
 825   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 826     ;
 827
 828   if (str[i++] == '.')
 829     return (char *)str + i;
 830   else
 831     return NULL;
 832 }
 833
 834 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 835    `]').  */
 836
 837 bool
 838 has_wildcards_p (const char *s)
 839 {
 840   for (; *s; s++)
 841     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 842       return true;
 843   return false;
 844 }
 845
 846 /* Return true if FNAME ends with a typical HTML suffix.  The
 847    following (case-insensitive) suffixes are presumed to be HTML
 848    files:
 849
 850      html
 851      htm
 852      ?html (`?' matches one character)
 853
 854    #### CAVEAT.  This is not necessarily a good indication that FNAME
 855    refers to a file that contains HTML!  */
 856 bool
 857 has_html_suffix_p (const char *fname)
 858 {
 859   char *suf;
 860
 861   if ((suf = suffix (fname)) == NULL)
 862     return false;
 863   if (!strcasecmp (suf, "html"))
 864     return true;
 865   if (!strcasecmp (suf, "htm"))
 866     return true;
 867   if (suf[0] && !strcasecmp (suf + 1, "html"))
 868     return true;
 869   return false;
 870 }
 871
 872 /* Read a line from FP and return the pointer to freshly allocated
 873    storage.  The storage space is obtained through malloc() and should
 874    be freed with free() when it is no longer needed.
 875
 876    The length of the line is not limited, except by available memory.
 877    The newline character at the end of line is retained.  The line is
 878    terminated with a zero character.
 879
 880    After end-of-file is encountered without anything being read, NULL
 881    is returned.  NULL is also returned on error.  To distinguish
 882    between these two cases, use the stdio function ferror().  */
 883
 884 char *
 885 read_whole_line (FILE *fp)
 886 {
 887   int length = 0;
 888   int bufsize = 82;
 889   char *line = xmalloc (bufsize);
 890
 891   while (fgets (line + length, bufsize - length, fp))
 892     {
 893       length += strlen (line + length);
 894       if (length == 0)
 895         /* Possible for example when reading from a binary file where
 896            a line begins with \0.  */
 897         continue;
 898
 899       if (line[length - 1] == '\n')
 900         break;
 901
 902       /* fgets() guarantees to read the whole line, or to use up the
 903          space we've given it.  We can double the buffer
 904          unconditionally.  */
 905       bufsize <<= 1;
 906       line = xrealloc (line, bufsize);
 907     }
 908   if (length == 0 || ferror (fp))
 909     {
 910       xfree (line);
 911       return NULL;
 912     }
 913   if (length + 1 < bufsize)
 914     /* Relieve the memory from our exponential greediness.  We say
 915        `length + 1' because the terminating \0 is not included in
 916        LENGTH.  We don't need to zero-terminate the string ourselves,
 917        though, because fgets() does that.  */
 918     line = xrealloc (line, length + 1);
 919   return line;
 920 }
 921 \f
 922 /* Read FILE into memory.  A pointer to `struct file_memory' are
 923    returned; use struct element `content' to access file contents, and
 924    the element `length' to know the file length.  `content' is *not*
 925    zero-terminated, and you should *not* read or write beyond the [0,
 926    length) range of characters.
 927
 928    After you are done with the file contents, call read_file_free to
 929    release the memory.
 930
 931    Depending on the operating system and the type of file that is
 932    being read, read_file() either mmap's the file into memory, or
 933    reads the file into the core using read().
 934
 935    If file is named "-", fileno(stdin) is used for reading instead.
 936    If you want to read from a real file named "-", use "./-" instead.  */
 937
 938 struct file_memory *
 939 read_file (const char *file)
 940 {
 941   int fd;
 942   struct file_memory *fm;
 943   long size;
 944   bool inhibit_close = false;
 945
 946   /* Some magic in the finest tradition of Perl and its kin: if FILE
 947      is "-", just use stdin.  */
 948   if (HYPHENP (file))
 949     {
 950       fd = fileno (stdin);
 951       inhibit_close = true;
 952       /* Note that we don't inhibit mmap() in this case.  If stdin is
 953          redirected from a regular file, mmap() will still work.  */
 954     }
 955   else
 956     fd = open (file, O_RDONLY);
 957   if (fd < 0)
 958     return NULL;
 959   fm = xnew (struct file_memory);
 960
 961 #ifdef HAVE_MMAP
 962   {
 963     struct_fstat buf;
 964     if (fstat (fd, &buf) < 0)
 965       goto mmap_lose;
 966     fm->length = buf.st_size;
 967     /* NOTE: As far as I know, the callers of this function never
 968        modify the file text.  Relying on this would enable us to
 969        specify PROT_READ and MAP_SHARED for a marginal gain in
 970        efficiency, but at some cost to generality.  */
 971     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 972                         MAP_PRIVATE, fd, 0);
 973     if (fm->content == (char *)MAP_FAILED)
 974       goto mmap_lose;
 975     if (!inhibit_close)
 976       close (fd);
 977
 978     fm->mmap_p = 1;
 979     return fm;
 980   }
 981
 982  mmap_lose:
 983   /* The most common reason why mmap() fails is that FD does not point
 984      to a plain file.  However, it's also possible that mmap() doesn't
 985      work for a particular type of file.  Therefore, whenever mmap()
 986      fails, we just fall back to the regular method.  */
 987 #endif /* HAVE_MMAP */
 988
 989   fm->length = 0;
 990   size = 512;                   /* number of bytes fm->contents can
 991                                    hold at any given time. */
 992   fm->content = xmalloc (size);
 993   while (1)
 994     {
 995       wgint nread;
 996       if (fm->length > size / 2)
 997         {
 998           /* #### I'm not sure whether the whole exponential-growth
 999              thing makes sense with kernel read.  On Linux at least,
1000              read() refuses to read more than 4K from a file at a
1001              single chunk anyway.  But other Unixes might optimize it
1002              better, and it doesn't *hurt* anything, so I'm leaving
1003              it.  */
1004
1005           /* Normally, we grow SIZE exponentially to make the number
1006              of calls to read() and realloc() logarithmic in relation
1007              to file size.  However, read() can read an amount of data
1008              smaller than requested, and it would be unreasonable to
1009              double SIZE every time *something* was read.  Therefore,
1010              we double SIZE only when the length exceeds half of the
1011              entire allocated size.  */
1012           size <<= 1;
1013           fm->content = xrealloc (fm->content, size);
1014         }
1015       nread = read (fd, fm->content + fm->length, size - fm->length);
1016       if (nread > 0)
1017         /* Successful read. */
1018         fm->length += nread;
1019       else if (nread < 0)
1020         /* Error. */
1021         goto lose;
1022       else
1023         /* EOF */
1024         break;
1025     }
1026   if (!inhibit_close)
1027     close (fd);
1028   if (size > fm->length && fm->length != 0)
1029     /* Due to exponential growth of fm->content, the allocated region
1030        might be much larger than what is actually needed.  */
1031     fm->content = xrealloc (fm->content, fm->length);
1032   fm->mmap_p = 0;
1033   return fm;
1034
1035  lose:
1036   if (!inhibit_close)
1037     close (fd);
1038   xfree (fm->content);
1039   xfree (fm);
1040   return NULL;
1041 }
1042
1043 /* Release the resources held by FM.  Specifically, this calls
1044    munmap() or xfree() on fm->content, depending whether mmap or
1045    malloc/read were used to read in the file.  It also frees the
1046    memory needed to hold the FM structure itself.  */
1047
1048 void
1049 read_file_free (struct file_memory *fm)
1050 {
1051 #ifdef HAVE_MMAP
1052   if (fm->mmap_p)
1053     {
1054       munmap (fm->content, fm->length);
1055     }
1056   else
1057 #endif
1058     {
1059       xfree (fm->content);
1060     }
1061   xfree (fm);
1062 }
1063 \f
1064 /* Free the pointers in a NULL-terminated vector of pointers, then
1065    free the pointer itself.  */
1066 void
1067 free_vec (char **vec)
1068 {
1069   if (vec)
1070     {
1071       char **p = vec;
1072       while (*p)
1073         xfree (*p++);
1074       xfree (vec);
1075     }
1076 }
1077
1078 /* Append vector V2 to vector V1.  The function frees V2 and
1079    reallocates V1 (thus you may not use the contents of neither
1080    pointer after the call).  If V1 is NULL, V2 is returned.  */
1081 char **
1082 merge_vecs (char **v1, char **v2)
1083 {
1084   int i, j;
1085
1086   if (!v1)
1087     return v2;
1088   if (!v2)
1089     return v1;
1090   if (!*v2)
1091     {
1092       /* To avoid j == 0 */
1093       xfree (v2);
1094       return v1;
1095     }
1096   /* Count v1.  */
1097   for (i = 0; v1[i]; i++)
1098     ;
1099   /* Count v2.  */
1100   for (j = 0; v2[j]; j++)
1101     ;
1102   /* Reallocate v1.  */
1103   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1104   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1105   xfree (v2);
1106   return v1;
1107 }
1108
1109 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1110    is allocated as needed.  Return the new value of the vector. */
1111
1112 char **
1113 vec_append (char **vec, const char *str)
1114 {
1115   int cnt;                      /* count of vector elements, including
1116                                    the one we're about to append */
1117   if (vec != NULL)
1118     {
1119       for (cnt = 0; vec[cnt]; cnt++)
1120         ;
1121       ++cnt;
1122     }
1123   else
1124     cnt = 1;
1125   /* Reallocate the array to fit the new element and the NULL. */
1126   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1127   /* Append a copy of STR to the vector. */
1128   vec[cnt - 1] = xstrdup (str);
1129   vec[cnt] = NULL;
1130   return vec;
1131 }
1132 \f
1133 /* Sometimes it's useful to create "sets" of strings, i.e. special
1134    hash tables where you want to store strings as keys and merely
1135    query for their existence.  Here is a set of utility routines that
1136    makes that transparent.  */
1137
1138 void
1139 string_set_add (struct hash_table *ht, const char *s)
1140 {
1141   /* First check whether the set element already exists.  If it does,
1142      do nothing so that we don't have to free() the old element and
1143      then strdup() a new one.  */
1144   if (hash_table_contains (ht, s))
1145     return;
1146
1147   /* We use "1" as value.  It provides us a useful and clear arbitrary
1148      value, and it consumes no memory -- the pointers to the same
1149      string "1" will be shared by all the key-value pairs in all `set'
1150      hash tables.  */
1151   hash_table_put (ht, xstrdup (s), "1");
1152 }
1153
1154 /* Synonym for hash_table_contains... */
1155
1156 int
1157 string_set_contains (struct hash_table *ht, const char *s)
1158 {
1159   return hash_table_contains (ht, s);
1160 }
1161
1162 /* Convert the specified string set to array.  ARRAY should be large
1163    enough to hold hash_table_count(ht) char pointers.  */
1164
1165 void string_set_to_array (struct hash_table *ht, char **array)
1166 {
1167   hash_table_iterator iter;
1168   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1169     *array++ = iter.key;
1170 }
1171
1172 /* Free the string set.  This frees both the storage allocated for
1173    keys and the actual hash table.  (hash_table_destroy would only
1174    destroy the hash table.)  */
1175
1176 void
1177 string_set_free (struct hash_table *ht)
1178 {
1179   hash_table_iterator iter;
1180   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1181     xfree (iter.key);
1182   hash_table_destroy (ht);
1183 }
1184
1185 /* Utility function: simply call xfree() on all keys and values of HT.  */
1186
1187 void
1188 free_keys_and_values (struct hash_table *ht)
1189 {
1190   hash_table_iterator iter;
1191   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1192     {
1193       xfree (iter.key);
1194       xfree (iter.value);
1195     }
1196 }
1197 \f
1198 /* Get digit grouping data for thousand separors by calling
1199    localeconv().  The data includes separator string and grouping info
1200    and is cached after the first call to the function.
1201
1202    In locales that don't set a thousand separator (such as the "C"
1203    locale), this forces it to be ",".  We are now only showing
1204    thousand separators in one place, so this shouldn't be a problem in
1205    practice.  */
1206
1207 static void
1208 get_grouping_data (const char **sep, const char **grouping)
1209 {
1210   static const char *cached_sep;
1211   static const char *cached_grouping;
1212   static bool initialized;
1213   if (!initialized)
1214     {
1215       /* Get the grouping info from the locale. */
1216       struct lconv *lconv = localeconv ();
1217       cached_sep = lconv->thousands_sep;
1218       cached_grouping = lconv->grouping;
1219       if (!*cached_sep)
1220         {
1221           /* Many locales (such as "C" or "hr_HR") don't specify
1222              grouping, which we still want to use it for legibility.
1223              In those locales set the sep char to ',', unless that
1224              character is used for decimal point, in which case set it
1225              to ".".  */
1226           if (*lconv->decimal_point != ',')
1227             cached_sep = ",";
1228           else
1229             cached_sep = ".";
1230           cached_grouping = "\x03";
1231         }
1232       initialized = true;
1233     }
1234   *sep = cached_sep;
1235   *grouping = cached_grouping;
1236 }
1237
1238 /* Return a printed representation of N with thousand separators.
1239    This should respect locale settings, with the exception of the "C"
1240    locale which mandates no separator, but we use one anyway.
1241
1242    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1243    the separators because it's too non-portable, and it's hard to test
1244    for this feature at configure time.  Besides, it wouldn't display
1245    separators in the "C" locale, still used by many Unix users.  */
1246
1247 const char *
1248 with_thousand_seps (wgint n)
1249 {
1250   static char outbuf[48];
1251   char *p = outbuf + sizeof outbuf;
1252
1253   /* Info received from locale */
1254   const char *grouping, *sep;
1255   int seplen;
1256
1257   /* State information */
1258   int i = 0, groupsize;
1259   const char *atgroup;
1260
1261   bool negative = n < 0;
1262
1263   /* Initialize grouping data. */
1264   get_grouping_data (&sep, &grouping);
1265   seplen = strlen (sep);
1266   atgroup = grouping;
1267   groupsize = *atgroup++;
1268
1269   /* This would overflow on WGINT_MIN, but printing negative numbers
1270      is not an important goal of this fuinction.  */
1271   if (negative)
1272     n = -n;
1273
1274   /* Write the number into the buffer, backwards, inserting the
1275      separators as necessary.  */
1276   *--p = '\0';
1277   while (1)
1278     {
1279       *--p = n % 10 + '0';
1280       n /= 10;
1281       if (n == 0)
1282         break;
1283       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1284       if (++i == groupsize)
1285         {
1286           if (seplen == 1)
1287             *--p = *sep;
1288           else
1289             memcpy (p -= seplen, sep, seplen);
1290           i = 0;
1291           if (*atgroup)
1292             groupsize = *atgroup++;
1293         }
1294     }
1295   if (negative)
1296     *--p = '-';
1297
1298   return p;
1299 }
1300
1301 /* N, a byte quantity, is converted to a human-readable abberviated
1302    form a la sizes printed by `ls -lh'.  The result is written to a
1303    static buffer, a pointer to which is returned.
1304
1305    Unlike `with_thousand_seps', this approximates to the nearest unit.
1306    Quoting GNU libit: "Most people visually process strings of 3-4
1307    digits effectively, but longer strings of digits are more prone to
1308    misinterpretation.  Hence, converting to an abbreviated form
1309    usually improves readability."
1310
1311    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1312    original computer-related meaning of "powers of 1024".  We don't
1313    use the "*bibyte" names invented in 1998, and seldom used in
1314    practice.  Wikipedia's entry on "binary prefix" discusses this in
1315    some detail.  */
1316
1317 char *
1318 human_readable (HR_NUMTYPE n)
1319 {
1320   /* These suffixes are compatible with those of GNU `ls -lh'. */
1321   static char powers[] =
1322     {
1323       'K',                      /* kilobyte, 2^10 bytes */
1324       'M',                      /* megabyte, 2^20 bytes */
1325       'G',                      /* gigabyte, 2^30 bytes */
1326       'T',                      /* terabyte, 2^40 bytes */
1327       'P',                      /* petabyte, 2^50 bytes */
1328       'E',                      /* exabyte,  2^60 bytes */
1329     };
1330   static char buf[8];
1331   int i;
1332
1333   /* If the quantity is smaller than 1K, just print it. */
1334   if (n < 1024)
1335     {
1336       snprintf (buf, sizeof (buf), "%d", (int) n);
1337       return buf;
1338     }
1339
1340   /* Loop over powers, dividing N with 1024 in each iteration.  This
1341      works unchanged for all sizes of wgint, while still avoiding
1342      non-portable `long double' arithmetic.  */
1343   for (i = 0; i < countof (powers); i++)
1344     {
1345       /* At each iteration N is greater than the *subsequent* power.
1346          That way N/1024.0 produces a decimal number in the units of
1347          *this* power.  */
1348       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1349         {
1350           double val = n / 1024.0;
1351           /* Print values smaller than 10 with one decimal digits, and
1352              others without any decimals.  */
1353           snprintf (buf, sizeof (buf), "%.*f%c",
1354                     val < 10 ? 1 : 0, val, powers[i]);
1355           return buf;
1356         }
1357       n /= 1024;
1358     }
1359   return NULL;                  /* unreached */
1360 }
1361
1362 /* Count the digits in the provided number.  Used to allocate space
1363    when printing numbers.  */
1364
1365 int
1366 numdigit (wgint number)
1367 {
1368   int cnt = 1;
1369   if (number < 0)
1370     ++cnt;                      /* accomodate '-' */
1371   while ((number /= 10) != 0)
1372     ++cnt;
1373   return cnt;
1374 }
1375
1376 #define PR(mask) *p++ = n / (mask) + '0'
1377
1378 /* DIGITS_<D> is used to print a D-digit number and should be called
1379    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1380    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1381    Recursively this continues until DIGITS_1 is invoked.  */
1382
1383 #define DIGITS_1(mask) PR (mask)
1384 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1385 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1386 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1387 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1388 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1389 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1390 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1391 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1392 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1393
1394 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1395
1396 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1397 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1398 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1399 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1400 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1401 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1402 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1403 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1404 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1405
1406 /* Shorthand for casting to wgint. */
1407 #define W wgint
1408
1409 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1410    `sprintf(buffer, "%lld", (long long) number)', only typically much
1411    faster and portable to machines without long long.
1412
1413    The speedup may make a difference in programs that frequently
1414    convert numbers to strings.  Some implementations of sprintf,
1415    particularly the one in some versions of GNU libc, have been known
1416    to be quite slow when converting integers to strings.
1417
1418    Return the pointer to the location where the terminating zero was
1419    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1420    function is done.)
1421
1422    BUFFER should be large enough to accept as many bytes as you expect
1423    the number to take up.  On machines with 64-bit wgints the maximum
1424    needed size is 24 bytes.  That includes the digits needed for the
1425    largest 64-bit number, the `-' sign in case it's negative, and the
1426    terminating '\0'.  */
1427
1428 char *
1429 number_to_string (char *buffer, wgint number)
1430 {
1431   char *p = buffer;
1432   wgint n = number;
1433
1434   int last_digit_char = 0;
1435
1436 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1437   /* We are running in a very strange environment.  Leave the correct
1438      printing to sprintf.  */
1439   p += sprintf (buf, "%j", (intmax_t) (n));
1440 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1441
1442   if (n < 0)
1443     {
1444       if (n < -WGINT_MAX)
1445         {
1446           /* n = -n would overflow because -n would evaluate to a
1447              wgint value larger than WGINT_MAX.  Need to make n
1448              smaller and handle the last digit separately.  */
1449           int last_digit = n % 10;
1450           /* The sign of n%10 is implementation-defined. */
1451           if (last_digit < 0)
1452             last_digit_char = '0' - last_digit;
1453           else
1454             last_digit_char = '0' + last_digit;
1455           /* After n is made smaller, -n will not overflow. */
1456           n /= 10;
1457         }
1458
1459       *p++ = '-';
1460       n = -n;
1461     }
1462
1463   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1464      way printing any N is fully open-coded without a loop or jump.
1465      (Also see description of DIGITS_*.)  */
1466
1467   if      (n < 10)                       DIGITS_1 (1);
1468   else if (n < 100)                      DIGITS_2 (10);
1469   else if (n < 1000)                     DIGITS_3 (100);
1470   else if (n < 10000)                    DIGITS_4 (1000);
1471   else if (n < 100000)                   DIGITS_5 (10000);
1472   else if (n < 1000000)                  DIGITS_6 (100000);
1473   else if (n < 10000000)                 DIGITS_7 (1000000);
1474   else if (n < 100000000)                DIGITS_8 (10000000);
1475   else if (n < 1000000000)               DIGITS_9 (100000000);
1476 #if SIZEOF_WGINT == 4
1477   /* wgint is 32 bits wide: no number has more than 10 digits. */
1478   else                                   DIGITS_10 (1000000000);
1479 #else
1480   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1481      Constants are constructed by compile-time multiplication to avoid
1482      dealing with different notations for 64-bit constants
1483      (nL/nLL/nI64, depending on the compiler and architecture).  */
1484   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1485   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1486   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1487   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1488   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1489   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1490   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1491   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1492   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1493   else                                   DIGITS_19 (1000000000*(W)1000000000);
1494 #endif
1495
1496   if (last_digit_char)
1497     *p++ = last_digit_char;
1498
1499   *p = '\0';
1500 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1501
1502   return p;
1503 }
1504
1505 #undef PR
1506 #undef W
1507 #undef SPRINTF_WGINT
1508 #undef DIGITS_1
1509 #undef DIGITS_2
1510 #undef DIGITS_3
1511 #undef DIGITS_4
1512 #undef DIGITS_5
1513 #undef DIGITS_6
1514 #undef DIGITS_7
1515 #undef DIGITS_8
1516 #undef DIGITS_9
1517 #undef DIGITS_10
1518 #undef DIGITS_11
1519 #undef DIGITS_12
1520 #undef DIGITS_13
1521 #undef DIGITS_14
1522 #undef DIGITS_15
1523 #undef DIGITS_16
1524 #undef DIGITS_17
1525 #undef DIGITS_18
1526 #undef DIGITS_19
1527
1528 #define RING_SIZE 3
1529
1530 /* Print NUMBER to a statically allocated string and return a pointer
1531    to the printed representation.
1532
1533    This function is intended to be used in conjunction with printf.
1534    It is hard to portably print wgint values:
1535     a) you cannot use printf("%ld", number) because wgint can be long
1536        long on 32-bit machines with LFS.
1537     b) you cannot use printf("%lld", number) because NUMBER could be
1538        long on 32-bit machines without LFS, or on 64-bit machines,
1539        which do not require LFS.  Also, Windows doesn't support %lld.
1540     c) you cannot use printf("%j", (int_max_t) number) because not all
1541        versions of printf support "%j", the most notable being the one
1542        on Windows.
1543     d) you cannot #define WGINT_FMT to the appropriate format and use
1544        printf(WGINT_FMT, number) because that would break translations
1545        for user-visible messages, such as printf("Downloaded: %d
1546        bytes\n", number).
1547
1548    What you should use instead is printf("%s", number_to_static_string
1549    (number)).
1550
1551    CAVEAT: since the function returns pointers to static data, you
1552    must be careful to copy its result before calling it again.
1553    However, to make it more useful with printf, the function maintains
1554    an internal ring of static buffers to return.  That way things like
1555    printf("%s %s", number_to_static_string (num1),
1556    number_to_static_string (num2)) work as expected.  Three buffers
1557    are currently used, which means that "%s %s %s" will work, but "%s
1558    %s %s %s" won't.  If you need to print more than three wgints,
1559    bump the RING_SIZE (or rethink your message.)  */
1560
1561 char *
1562 number_to_static_string (wgint number)
1563 {
1564   static char ring[RING_SIZE][24];
1565   static int ringpos;
1566   char *buf = ring[ringpos];
1567   number_to_string (buf, number);
1568   ringpos = (ringpos + 1) % RING_SIZE;
1569   return buf;
1570 }
1571 \f
1572 /* Determine the width of the terminal we're running on.  If that's
1573    not possible, return 0.  */
1574
1575 int
1576 determine_screen_width (void)
1577 {
1578   /* If there's a way to get the terminal size using POSIX
1579      tcgetattr(), somebody please tell me.  */
1580 #ifdef TIOCGWINSZ
1581   int fd;
1582   struct winsize wsz;
1583
1584   if (opt.lfilename != NULL)
1585     return 0;
1586
1587   fd = fileno (stderr);
1588   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1589     return 0;                   /* most likely ENOTTY */
1590
1591   return wsz.ws_col;
1592 #elif defined(WINDOWS)
1593   CONSOLE_SCREEN_BUFFER_INFO csbi;
1594   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1595     return 0;
1596   return csbi.dwSize.X;
1597 #else  /* neither TIOCGWINSZ nor WINDOWS */
1598   return 0;
1599 #endif /* neither TIOCGWINSZ nor WINDOWS */
1600 }
1601 \f
1602 /* Whether the rnd system (either rand or [dl]rand48) has been
1603    seeded.  */
1604 static int rnd_seeded;
1605
1606 /* Return a random number between 0 and MAX-1, inclusive.
1607
1608    If the system does not support lrand48 and MAX is greater than the
1609    value of RAND_MAX+1 on the system, the returned value will be in
1610    the range [0, RAND_MAX].  This may be fixed in a future release.
1611    The random number generator is seeded automatically the first time
1612    it is called.
1613
1614    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1615    for cryptography.  It is only meant to be used in situations where
1616    quality of the random numbers returned doesn't really matter.  */
1617
1618 int
1619 random_number (int max)
1620 {
1621 #ifdef HAVE_DRAND48
1622   if (!rnd_seeded)
1623     {
1624       srand48 ((long) time (NULL) ^ (long) getpid ());
1625       rnd_seeded = 1;
1626     }
1627   return lrand48 () % max;
1628 #else  /* not HAVE_DRAND48 */
1629
1630   double bounded;
1631   int rnd;
1632   if (!rnd_seeded)
1633     {
1634       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1635       rnd_seeded = 1;
1636     }
1637   rnd = rand ();
1638
1639   /* Like rand() % max, but uses the high-order bits for better
1640      randomness on architectures where rand() is implemented using a
1641      simple congruential generator.  */
1642
1643   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1644   return (int) bounded;
1645
1646 #endif /* not HAVE_DRAND48 */
1647 }
1648
1649 /* Return a random uniformly distributed floating point number in the
1650    [0, 1) range.  Uses drand48 where available, and a really lame
1651    kludge elsewhere.  */
1652
1653 double
1654 random_float (void)
1655 {
1656 #ifdef HAVE_DRAND48
1657   if (!rnd_seeded)
1658     {
1659       srand48 ((long) time (NULL) ^ (long) getpid ());
1660       rnd_seeded = 1;
1661     }
1662   return drand48 ();
1663 #else  /* not HAVE_DRAND48 */
1664   return (  random_number (10000) / 10000.0
1665           + random_number (10000) / (10000.0 * 10000.0)
1666           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1667           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1668 #endif /* not HAVE_DRAND48 */
1669 }
1670 \f
1671 /* Implementation of run_with_timeout, a generic timeout-forcing
1672    routine for systems with Unix-like signal handling.  */
1673
1674 #ifdef USE_SIGNAL_TIMEOUT
1675 # ifdef HAVE_SIGSETJMP
1676 #  define SETJMP(env) sigsetjmp (env, 1)
1677
1678 static sigjmp_buf run_with_timeout_env;
1679
1680 static void
1681 abort_run_with_timeout (int sig)
1682 {
1683   assert (sig == SIGALRM);
1684   siglongjmp (run_with_timeout_env, -1);
1685 }
1686 # else /* not HAVE_SIGSETJMP */
1687 #  define SETJMP(env) setjmp (env)
1688
1689 static jmp_buf run_with_timeout_env;
1690
1691 static void
1692 abort_run_with_timeout (int sig)
1693 {
1694   assert (sig == SIGALRM);
1695   /* We don't have siglongjmp to preserve the set of blocked signals;
1696      if we longjumped out of the handler at this point, SIGALRM would
1697      remain blocked.  We must unblock it manually. */
1698   int mask = siggetmask ();
1699   mask &= ~sigmask (SIGALRM);
1700   sigsetmask (mask);
1701
1702   /* Now it's safe to longjump. */
1703   longjmp (run_with_timeout_env, -1);
1704 }
1705 # endif /* not HAVE_SIGSETJMP */
1706
1707 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1708    setitimer where available, alarm otherwise.
1709
1710    TIMEOUT should be non-zero.  If the timeout value is so small that
1711    it would be rounded to zero, it is rounded to the least legal value
1712    instead (1us for setitimer, 1s for alarm).  That ensures that
1713    SIGALRM will be delivered in all cases.  */
1714
1715 static void
1716 alarm_set (double timeout)
1717 {
1718 #ifdef ITIMER_REAL
1719   /* Use the modern itimer interface. */
1720   struct itimerval itv;
1721   xzero (itv);
1722   itv.it_value.tv_sec = (long) timeout;
1723   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1724   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1725     /* Ensure that we wait for at least the minimum interval.
1726        Specifying zero would mean "wait forever".  */
1727     itv.it_value.tv_usec = 1;
1728   setitimer (ITIMER_REAL, &itv, NULL);
1729 #else  /* not ITIMER_REAL */
1730   /* Use the old alarm() interface. */
1731   int secs = (int) timeout;
1732   if (secs == 0)
1733     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1734        because alarm(0) means "never deliver the alarm", i.e. "wait
1735        forever", which is not what someone who specifies a 0.5s
1736        timeout would expect.  */
1737     secs = 1;
1738   alarm (secs);
1739 #endif /* not ITIMER_REAL */
1740 }
1741
1742 /* Cancel the alarm set with alarm_set. */
1743
1744 static void
1745 alarm_cancel (void)
1746 {
1747 #ifdef ITIMER_REAL
1748   struct itimerval disable;
1749   xzero (disable);
1750   setitimer (ITIMER_REAL, &disable, NULL);
1751 #else  /* not ITIMER_REAL */
1752   alarm (0);
1753 #endif /* not ITIMER_REAL */
1754 }
1755
1756 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1757    seconds.  Returns true if the function was interrupted with a
1758    timeout, false otherwise.
1759
1760    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1761    using setitimer() or alarm().  The timeout is enforced by
1762    longjumping out of the SIGALRM handler.  This has several
1763    advantages compared to the traditional approach of relying on
1764    signals causing system calls to exit with EINTR:
1765
1766      * The callback function is *forcibly* interrupted after the
1767        timeout expires, (almost) regardless of what it was doing and
1768        whether it was in a syscall.  For example, a calculation that
1769        takes a long time is interrupted as reliably as an IO
1770        operation.
1771
1772      * It works with both SYSV and BSD signals because it doesn't
1773        depend on the default setting of SA_RESTART.
1774
1775      * It doesn't require special handler setup beyond a simple call
1776        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1777        optional.)
1778
1779    The only downside is that, if FUN allocates internal resources that
1780    are normally freed prior to exit from the functions, they will be
1781    lost in case of timeout.  */
1782
1783 bool
1784 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1785 {
1786   int saved_errno;
1787
1788   if (timeout == 0)
1789     {
1790       fun (arg);
1791       return false;
1792     }
1793
1794   signal (SIGALRM, abort_run_with_timeout);
1795   if (SETJMP (run_with_timeout_env) != 0)
1796     {
1797       /* Longjumped out of FUN with a timeout. */
1798       signal (SIGALRM, SIG_DFL);
1799       return true;
1800     }
1801   alarm_set (timeout);
1802   fun (arg);
1803
1804   /* Preserve errno in case alarm() or signal() modifies it. */
1805   saved_errno = errno;
1806   alarm_cancel ();
1807   signal (SIGALRM, SIG_DFL);
1808   errno = saved_errno;
1809
1810   return false;
1811 }
1812
1813 #else  /* not USE_SIGNAL_TIMEOUT */
1814
1815 #ifndef WINDOWS
1816 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1817    define it under Windows, because Windows has its own version of
1818    run_with_timeout that uses threads.  */
1819
1820 int
1821 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1822 {
1823   fun (arg);
1824   return false;
1825 }
1826 #endif /* not WINDOWS */
1827 #endif /* not USE_SIGNAL_TIMEOUT */
1828 \f
1829 #ifndef WINDOWS
1830
1831 /* Sleep the specified amount of seconds.  On machines without
1832    nanosleep(), this may sleep shorter if interrupted by signals.  */
1833
1834 void
1835 xsleep (double seconds)
1836 {
1837 #ifdef HAVE_NANOSLEEP
1838   /* nanosleep is the preferred interface because it offers high
1839      accuracy and, more importantly, because it allows us to reliably
1840      restart receiving a signal such as SIGWINCH.  (There was an
1841      actual Debian bug report about --limit-rate malfunctioning while
1842      the terminal was being resized.)  */
1843   struct timespec sleep, remaining;
1844   sleep.tv_sec = (long) seconds;
1845   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1846   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1847     /* If nanosleep has been interrupted by a signal, adjust the
1848        sleeping period and return to sleep.  */
1849     sleep = remaining;
1850 #elif defined(HAVE_USLEEP)
1851   /* If usleep is available, use it in preference to select.  */
1852   if (seconds >= 1)
1853     {
1854       /* On some systems, usleep cannot handle values larger than
1855          1,000,000.  If the period is larger than that, use sleep
1856          first, then add usleep for subsecond accuracy.  */
1857       sleep (seconds);
1858       seconds -= (long) seconds;
1859     }
1860   usleep (seconds * 1000000);
1861 #else /* fall back select */
1862   /* Note that, although Windows supports select, it can't be used to
1863      implement sleeping because Winsock's select doesn't implement
1864      timeout when it is passed NULL pointers for all fd sets.  (But it
1865      does under Cygwin, which implements Unix-compatible select.)  */
1866   struct timeval sleep;
1867   sleep.tv_sec = (long) seconds;
1868   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1869   select (0, NULL, NULL, NULL, &sleep);
1870   /* If select returns -1 and errno is EINTR, it means we were
1871      interrupted by a signal.  But without knowing how long we've
1872      actually slept, we can't return to sleep.  Using gettimeofday to
1873      track sleeps is slow and unreliable due to clock skew.  */
1874 #endif
1875 }
1876
1877 #endif /* not WINDOWS */
1878
1879 /* Encode the octets in DATA of length LENGTH to base64 format,
1880    storing the result to DEST.  The output will be zero-terminated,
1881    and must point to a writable buffer of at least
1882    1+BASE64_LENGTH(length) bytes.  The function returns the length of
1883    the resulting base64 data, not counting the terminating zero.
1884
1885    This implementation does not emit newlines after 76 characters of
1886    base64 data.  */
1887
1888 int
1889 base64_encode (const void *data, int length, char *dest)
1890 {
1891   /* Conversion table.  */
1892   static const char tbl[64] = {
1893     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1894     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1895     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1896     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1897   };
1898   /* Access bytes in DATA as unsigned char, otherwise the shifts below
1899      don't work for data with MSB set. */
1900   const unsigned char *s = data;
1901   /* Theoretical ANSI violation when length < 3. */
1902   const unsigned char *end = (const unsigned char *) data + length - 2;
1903   char *p = dest;
1904
1905   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1906   for (; s < end; s += 3)
1907     {
1908       *p++ = tbl[s[0] >> 2];
1909       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1910       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1911       *p++ = tbl[s[2] & 0x3f];
1912     }
1913
1914   /* Pad the result if necessary...  */
1915   switch (length % 3)
1916     {
1917     case 1:
1918       *p++ = tbl[s[0] >> 2];
1919       *p++ = tbl[(s[0] & 3) << 4];
1920       *p++ = '=';
1921       *p++ = '=';
1922       break;
1923     case 2:
1924       *p++ = tbl[s[0] >> 2];
1925       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1926       *p++ = tbl[((s[1] & 0xf) << 2)];
1927       *p++ = '=';
1928       break;
1929     }
1930   /* ...and zero-terminate it.  */
1931   *p = '\0';
1932
1933   return p - dest;
1934 }
1935
1936 /* Store in C the next non-whitespace character from the string, or \0
1937    when end of string is reached.  */
1938 #define NEXT_CHAR(c, p) do {                    \
1939   c = (unsigned char) *p++;                     \
1940 } while (ISSPACE (c))
1941
1942 #define IS_ASCII(c) (((c) & 0x80) == 0)
1943
1944 /* Decode data from BASE64 (a null-terminated string) into memory
1945    pointed to by DEST.  DEST is assumed to be large enough to
1946    accomodate the decoded data, which is guaranteed to be no more than
1947    3/4*strlen(base64).
1948
1949    Since DEST is assumed to contain binary data, it is not
1950    NUL-terminated.  The function returns the length of the data
1951    written to TO.  -1 is returned in case of error caused by malformed
1952    base64 input.
1953
1954    This function originates from Free Recode.  */
1955
1956 int
1957 base64_decode (const char *base64, void *dest)
1958 {
1959   /* Table of base64 values for first 128 characters.  Note that this
1960      assumes ASCII (but so does Wget in other places).  */
1961   static const signed char base64_char_to_value[128] =
1962     {
1963       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
1964       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
1965       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
1966       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
1967       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
1968       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
1969       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
1970       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
1971       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
1972       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
1973       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
1974       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
1975       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
1976     };
1977 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
1978 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
1979
1980   const char *p = base64;
1981   char *q = dest;
1982
1983   while (1)
1984     {
1985       unsigned char c;
1986       unsigned long value;
1987
1988       /* Process first byte of a quadruplet.  */
1989       NEXT_CHAR (c, p);
1990       if (!c)
1991         break;
1992       if (c == '=' || !IS_BASE64 (c))
1993         return -1;              /* illegal char while decoding base64 */
1994       value = BASE64_CHAR_TO_VALUE (c) << 18;
1995
1996       /* Process second byte of a quadruplet.  */
1997       NEXT_CHAR (c, p);
1998       if (!c)
1999         return -1;              /* premature EOF while decoding base64 */
2000       if (c == '=' || !IS_BASE64 (c))
2001         return -1;              /* illegal char while decoding base64 */
2002       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2003       *q++ = value >> 16;
2004
2005       /* Process third byte of a quadruplet.  */
2006       NEXT_CHAR (c, p);
2007       if (!c)
2008         return -1;              /* premature EOF while decoding base64 */
2009       if (!IS_BASE64 (c))
2010         return -1;              /* illegal char while decoding base64 */
2011
2012       if (c == '=')
2013         {
2014           NEXT_CHAR (c, p);
2015           if (!c)
2016             return -1;          /* premature EOF while decoding base64 */
2017           if (c != '=')
2018             return -1;          /* padding `=' expected but not found */
2019           continue;
2020         }
2021
2022       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2023       *q++ = 0xff & value >> 8;
2024
2025       /* Process fourth byte of a quadruplet.  */
2026       NEXT_CHAR (c, p);
2027       if (!c)
2028         return -1;              /* premature EOF while decoding base64 */
2029       if (c == '=')
2030         continue;
2031       if (!IS_BASE64 (c))
2032         return -1;              /* illegal char while decoding base64 */
2033
2034       value |= BASE64_CHAR_TO_VALUE (c);
2035       *q++ = 0xff & value;
2036     }
2037 #undef IS_BASE64
2038 #undef BASE64_CHAR_TO_VALUE
2039
2040   return q - (char *) dest;
2041 }
2042
2043 #undef IS_ASCII
2044 #undef NEXT_CHAR
2045 \f
2046 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2047    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2048
2049 static void
2050 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2051                     int (*cmpfun) (const void *, const void *))
2052 {
2053 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2054   if (from < to)
2055     {
2056       size_t i, j, k;
2057       size_t mid = (to + from) / 2;
2058       mergesort_internal (base, temp, size, from, mid, cmpfun);
2059       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2060       i = from;
2061       j = mid + 1;
2062       for (k = from; (i <= mid) && (j <= to); k++)
2063         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2064           memcpy (ELT (temp, k), ELT (base, i++), size);
2065         else
2066           memcpy (ELT (temp, k), ELT (base, j++), size);
2067       while (i <= mid)
2068         memcpy (ELT (temp, k++), ELT (base, i++), size);
2069       while (j <= to)
2070         memcpy (ELT (temp, k++), ELT (base, j++), size);
2071       for (k = from; k <= to; k++)
2072         memcpy (ELT (base, k), ELT (temp, k), size);
2073     }
2074 #undef ELT
2075 }
2076
2077 /* Stable sort with interface exactly like standard library's qsort.
2078    Uses mergesort internally, allocating temporary storage with
2079    alloca.  */
2080
2081 void
2082 stable_sort (void *base, size_t nmemb, size_t size,
2083              int (*cmpfun) (const void *, const void *))
2084 {
2085   if (size > 1)
2086     {
2087       void *temp = alloca (nmemb * size * sizeof (void *));
2088       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2089     }
2090 }
2091 \f
2092 /* Print a decimal number.  If it is equal to or larger than ten, the
2093    number is rounded.  Otherwise it is printed with one significant
2094    digit without trailing zeros and with no more than three fractional
2095    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2096    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2097
2098    This is useful for displaying durations because it provides
2099    order-of-magnitude information without unnecessary clutter --
2100    long-running downloads are shown without the fractional part, and
2101    short ones still retain one significant digit.  */
2102
2103 const char *
2104 print_decimal (double number)
2105 {
2106   static char buf[32];
2107   double n = number >= 0 ? number : -number;
2108
2109   if (n >= 9.95)
2110     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2111        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2112     snprintf (buf, sizeof buf, "%.0f", number);
2113   else if (n >= 0.95)
2114     snprintf (buf, sizeof buf, "%.1f", number);
2115   else if (n >= 0.001)
2116     snprintf (buf, sizeof buf, "%.1g", number);
2117   else if (n >= 0.0005)
2118     /* round [0.0005, 0.001) to 0.001 */
2119     snprintf (buf, sizeof buf, "%.3f", number);
2120   else
2121     /* print numbers close to 0 as 0, not 0.000 */
2122     strcpy (buf, "0");
2123
2124   return buf;
2125 }
2126
2127 #ifdef TESTING
2128
2129 const char *
2130 test_subdir_p()
2131 {
2132   int i;
2133   struct {
2134     char *d1;
2135     char *d2;
2136     bool result;
2137   } test_array[] = {
2138     { "/somedir", "/somedir", true },
2139     { "/somedir", "/somedir/d2", true },
2140     { "/somedir/d1", "/somedir", false },
2141   };
2142
2143   for (i = 0; i < countof(test_array); ++i)
2144     {
2145       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2146
2147       mu_assert ("test_subdir_p: wrong result",
2148                  res == test_array[i].result);
2149     }
2150
2151   return NULL;
2152 }
2153
2154 const char *
2155 test_dir_matches_p()
2156 {
2157   int i;
2158   struct {
2159     char *dirlist[3];
2160     char *dir;
2161     bool result;
2162   } test_array[] = {
2163     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2164     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2165     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2166     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2167     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2168   };
2169
2170   for (i = 0; i < countof(test_array); ++i)
2171     {
2172       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2173
2174       mu_assert ("test_dir_matches_p: wrong result",
2175                  res == test_array[i].result);
2176     }
2177
2178   return NULL;
2179 }
2180
2181 #endif /* TESTING */
2182