sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996-2006 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9 (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software Foundation, Inc.,
  18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <time.h>
  36 #ifdef HAVE_SYS_TIME_H
  37 # include <sys/time.h>
  38 #endif
  39 #ifdef HAVE_UNISTD_H
  40 # include <unistd.h>
  41 #endif
  42 #ifdef HAVE_MMAP
  43 # include <sys/mman.h>
  44 #endif
  45 #ifdef HAVE_UTIME_H
  46 # include <utime.h>
  47 #endif
  48 #ifdef HAVE_SYS_UTIME_H
  49 # include <sys/utime.h>
  50 #endif
  51 #include <errno.h>
  52 #include <fcntl.h>
  53 #include <assert.h>
  54 #include <stdarg.h>
  55 #include <locale.h>
  56
  57 /* For TIOCGWINSZ and friends: */
  58 #ifdef HAVE_SYS_IOCTL_H
  59 # include <sys/ioctl.h>
  60 #endif
  61 #ifdef HAVE_TERMIOS_H
  62 # include <termios.h>
  63 #endif
  64
  65 /* Needed for Unix version of run_with_timeout. */
  66 #include <signal.h>
  67 #include <setjmp.h>
  68
  69 #ifndef HAVE_SIGSETJMP
  70 /* If sigsetjmp is a macro, configure won't pick it up. */
  71 # ifdef sigsetjmp
  72 #  define HAVE_SIGSETJMP
  73 # endif
  74 #endif
  75
  76 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  77 # define USE_SIGNAL_TIMEOUT
  78 #endif
  79
  80 #include "wget.h"
  81 #include "utils.h"
  82 #include "hash.h"
  83
  84 #ifdef TESTING
  85 #include "test.h"
  86 #endif
  87
  88 /* Utility function: like xstrdup(), but also lowercases S.  */
  89
  90 char *
  91 xstrdup_lower (const char *s)
  92 {
  93   char *copy = xstrdup (s);
  94   char *p = copy;
  95   for (; *p; p++)
  96     *p = TOLOWER (*p);
  97   return copy;
  98 }
  99
 100 /* Copy the string formed by two pointers (one on the beginning, other
 101    on the char after the last char) to a new, malloc-ed location.
 102    0-terminate it.  */
 103 char *
 104 strdupdelim (const char *beg, const char *end)
 105 {
 106   char *res = xmalloc (end - beg + 1);
 107   memcpy (res, beg, end - beg);
 108   res[end - beg] = '\0';
 109   return res;
 110 }
 111
 112 /* Parse a string containing comma-separated elements, and return a
 113    vector of char pointers with the elements.  Spaces following the
 114    commas are ignored.  */
 115 char **
 116 sepstring (const char *s)
 117 {
 118   char **res;
 119   const char *p;
 120   int i = 0;
 121
 122   if (!s || !*s)
 123     return NULL;
 124   res = NULL;
 125   p = s;
 126   while (*s)
 127     {
 128       if (*s == ',')
 129         {
 130           res = xrealloc (res, (i + 2) * sizeof (char *));
 131           res[i] = strdupdelim (p, s);
 132           res[++i] = NULL;
 133           ++s;
 134           /* Skip the blanks following the ','.  */
 135           while (ISSPACE (*s))
 136             ++s;
 137           p = s;
 138         }
 139       else
 140         ++s;
 141     }
 142   res = xrealloc (res, (i + 2) * sizeof (char *));
 143   res[i] = strdupdelim (p, s);
 144   res[i + 1] = NULL;
 145   return res;
 146 }
 147 \f
 148 /* Like sprintf, but prints into a string of sufficient size freshly
 149    allocated with malloc, which is returned.  If unable to print due
 150    to invalid format, returns NULL.  Inability to allocate needed
 151    memory results in abort, as with xmalloc.  This is in spirit
 152    similar to the GNU/BSD extension asprintf, but somewhat easier to
 153    use.
 154
 155    Internally the function either calls vasprintf or loops around
 156    vsnprintf until the correct size is found.  Since Wget also ships a
 157    fallback implementation of vsnprintf, this should be portable.  */
 158
 159 char *
 160 aprintf (const char *fmt, ...)
 161 {
 162 #ifdef HAVE_VASPRINTF
 163   /* Use vasprintf. */
 164   int ret;
 165   va_list args;
 166   char *str;
 167   va_start (args, fmt);
 168   ret = vasprintf (&str, fmt, args);
 169   va_end (args);
 170   if (ret < 0 && errno == ENOMEM)
 171     abort ();                   /* for consistency with xmalloc/xrealloc */
 172   else if (ret < 0)
 173     return NULL;
 174   return str;
 175 #else  /* not HAVE_VASPRINTF */
 176
 177   /* vasprintf is unavailable.  snprintf into a small buffer and
 178      resize it as necessary. */
 179   int size = 32;
 180   char *str = xmalloc (size);
 181
 182   /* #### This code will infloop and eventually abort in xrealloc if
 183      passed a FMT that causes snprintf to consistently return -1.  */
 184
 185   while (1)
 186     {
 187       int n;
 188       va_list args;
 189
 190       va_start (args, fmt);
 191       n = vsnprintf (str, size, fmt, args);
 192       va_end (args);
 193
 194       /* If the printing worked, return the string. */
 195       if (n > -1 && n < size)
 196         return str;
 197
 198       /* Else try again with a larger buffer. */
 199       if (n > -1)               /* C99 */
 200         size = n + 1;           /* precisely what is needed */
 201       else
 202         size <<= 1;             /* twice the old size */
 203       str = xrealloc (str, size);
 204     }
 205 #endif /* not HAVE_VASPRINTF */
 206 }
 207
 208 /* Concatenate the NULL-terminated list of string arguments into
 209    freshly allocated space.  */
 210
 211 char *
 212 concat_strings (const char *str0, ...)
 213 {
 214   va_list args;
 215   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 216   char *ret, *p;
 217
 218   const char *next_str;
 219   int total_length = 0;
 220   int argcount;
 221
 222   /* Calculate the length of and allocate the resulting string. */
 223
 224   argcount = 0;
 225   va_start (args, str0);
 226   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 227     {
 228       int len = strlen (next_str);
 229       if (argcount < countof (saved_lengths))
 230         saved_lengths[argcount++] = len;
 231       total_length += len;
 232     }
 233   va_end (args);
 234   p = ret = xmalloc (total_length + 1);
 235
 236   /* Copy the strings into the allocated space. */
 237
 238   argcount = 0;
 239   va_start (args, str0);
 240   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 241     {
 242       int len;
 243       if (argcount < countof (saved_lengths))
 244         len = saved_lengths[argcount++];
 245       else
 246         len = strlen (next_str);
 247       memcpy (p, next_str, len);
 248       p += len;
 249     }
 250   va_end (args);
 251   *p = '\0';
 252
 253   return ret;
 254 }
 255 \f
 256 /* Return pointer to a static char[] buffer in which zero-terminated
 257    string-representation of TM (in form hh:mm:ss) is printed.
 258
 259    If TM is NULL, the current time will be used.  */
 260
 261 char *
 262 time_str (time_t *tm)
 263 {
 264   static char output[15];
 265   struct tm *ptm;
 266   time_t secs = tm ? *tm : time (NULL);
 267
 268   if (secs == -1)
 269     {
 270       /* In case of error, return the empty string.  Maybe we should
 271          just abort if this happens?  */
 272       *output = '\0';
 273       return output;
 274     }
 275   ptm = localtime (&secs);
 276   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 277   return output;
 278 }
 279
 280 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 281
 282 char *
 283 datetime_str (time_t *tm)
 284 {
 285   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 286   struct tm *ptm;
 287   time_t secs = tm ? *tm : time (NULL);
 288
 289   if (secs == -1)
 290     {
 291       /* In case of error, return the empty string.  Maybe we should
 292          just abort if this happens?  */
 293       *output = '\0';
 294       return output;
 295     }
 296   ptm = localtime (&secs);
 297   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 298            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 299            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 300   return output;
 301 }
 302 \f
 303 /* The Windows versions of the following two functions are defined in
 304    mswindows.c.  */
 305
 306 #ifndef WINDOWS
 307 void
 308 fork_to_background (void)
 309 {
 310   pid_t pid;
 311   /* Whether we arrange our own version of opt.lfilename here.  */
 312   bool logfile_changed = false;
 313
 314   if (!opt.lfilename)
 315     {
 316       /* We must create the file immediately to avoid either a race
 317          condition (which arises from using unique_name and failing to
 318          use fopen_excl) or lying to the user about the log file name
 319          (which arises from using unique_name, printing the name, and
 320          using fopen_excl later on.)  */
 321       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 322       if (new_log_fp)
 323         {
 324           logfile_changed = true;
 325           fclose (new_log_fp);
 326         }
 327     }
 328   pid = fork ();
 329   if (pid < 0)
 330     {
 331       /* parent, error */
 332       perror ("fork");
 333       exit (1);
 334     }
 335   else if (pid != 0)
 336     {
 337       /* parent, no error */
 338       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 339       if (logfile_changed)
 340         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 341       exit (0);                 /* #### should we use _exit()? */
 342     }
 343
 344   /* child: give up the privileges and keep running. */
 345   setsid ();
 346   freopen ("/dev/null", "r", stdin);
 347   freopen ("/dev/null", "w", stdout);
 348   freopen ("/dev/null", "w", stderr);
 349 }
 350 #endif /* not WINDOWS */
 351 \f
 352 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 353    specified with TM.  The atime ("access time") is set to the current
 354    time.  */
 355
 356 void
 357 touch (const char *file, time_t tm)
 358 {
 359 #ifdef HAVE_STRUCT_UTIMBUF
 360   struct utimbuf times;
 361 #else
 362   struct {
 363     time_t actime;
 364     time_t modtime;
 365   } times;
 366 #endif
 367   times.modtime = tm;
 368   times.actime = time (NULL);
 369   if (utime (file, &times) == -1)
 370     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 371 }
 372
 373 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 374    nothing under MS-Windows.  */
 375 int
 376 remove_link (const char *file)
 377 {
 378   int err = 0;
 379   struct_stat st;
 380
 381   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 382     {
 383       DEBUGP (("Unlinking %s (symlink).\n", file));
 384       err = unlink (file);
 385       if (err != 0)
 386         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 387                    file, strerror (errno));
 388     }
 389   return err;
 390 }
 391
 392 /* Does FILENAME exist?  This is quite a lousy implementation, since
 393    it supplies no error codes -- only a yes-or-no answer.  Thus it
 394    will return that a file does not exist if, e.g., the directory is
 395    unreadable.  I don't mind it too much currently, though.  The
 396    proper way should, of course, be to have a third, error state,
 397    other than true/false, but that would introduce uncalled-for
 398    additional complexity to the callers.  */
 399 bool
 400 file_exists_p (const char *filename)
 401 {
 402 #ifdef HAVE_ACCESS
 403   return access (filename, F_OK) >= 0;
 404 #else
 405   struct_stat buf;
 406   return stat (filename, &buf) >= 0;
 407 #endif
 408 }
 409
 410 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 411    Returns 0 on error.  */
 412 bool
 413 file_non_directory_p (const char *path)
 414 {
 415   struct_stat buf;
 416   /* Use lstat() rather than stat() so that symbolic links pointing to
 417      directories can be identified correctly.  */
 418   if (lstat (path, &buf) != 0)
 419     return false;
 420   return S_ISDIR (buf.st_mode) ? false : true;
 421 }
 422
 423 /* Return the size of file named by FILENAME, or -1 if it cannot be
 424    opened or seeked into. */
 425 wgint
 426 file_size (const char *filename)
 427 {
 428 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 429   wgint size;
 430   /* We use fseek rather than stat to determine the file size because
 431      that way we can also verify that the file is readable without
 432      explicitly checking for permissions.  Inspired by the POST patch
 433      by Arnaud Wylie.  */
 434   FILE *fp = fopen (filename, "rb");
 435   if (!fp)
 436     return -1;
 437   fseeko (fp, 0, SEEK_END);
 438   size = ftello (fp);
 439   fclose (fp);
 440   return size;
 441 #else
 442   struct_stat st;
 443   if (stat (filename, &st) < 0)
 444     return -1;
 445   return st.st_size;
 446 #endif
 447 }
 448
 449 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 450    doesn't exist is found.  Return a freshly allocated copy of the
 451    unused file name.  */
 452
 453 static char *
 454 unique_name_1 (const char *prefix)
 455 {
 456   int count = 1;
 457   int plen = strlen (prefix);
 458   char *template = (char *)alloca (plen + 1 + 24);
 459   char *template_tail = template + plen;
 460
 461   memcpy (template, prefix, plen);
 462   *template_tail++ = '.';
 463
 464   do
 465     number_to_string (template_tail, count++);
 466   while (file_exists_p (template));
 467
 468   return xstrdup (template);
 469 }
 470
 471 /* Return a unique file name, based on FILE.
 472
 473    More precisely, if FILE doesn't exist, it is returned unmodified.
 474    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 475    file name that doesn't exist is returned.
 476
 477    The resulting file is not created, only verified that it didn't
 478    exist at the point in time when the function was called.
 479    Therefore, where security matters, don't rely that the file created
 480    by this function exists until you open it with O_EXCL or
 481    equivalent.
 482
 483    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 484    string.  Otherwise, it may return FILE if the file doesn't exist
 485    (and therefore doesn't need changing).  */
 486
 487 char *
 488 unique_name (const char *file, bool allow_passthrough)
 489 {
 490   /* If the FILE itself doesn't exist, return it without
 491      modification. */
 492   if (!file_exists_p (file))
 493     return allow_passthrough ? (char *)file : xstrdup (file);
 494
 495   /* Otherwise, find a numeric suffix that results in unused file name
 496      and return it.  */
 497   return unique_name_1 (file);
 498 }
 499
 500 /* Create a file based on NAME, except without overwriting an existing
 501    file with that name.  Providing O_EXCL is correctly implemented,
 502    this function does not have the race condition associated with
 503    opening the file returned by unique_name.  */
 504
 505 FILE *
 506 unique_create (const char *name, bool binary, char **opened_name)
 507 {
 508   /* unique file name, based on NAME */
 509   char *uname = unique_name (name, false);
 510   FILE *fp;
 511   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 512     {
 513       xfree (uname);
 514       uname = unique_name (name, false);
 515     }
 516   if (opened_name && fp != NULL)
 517     {
 518       if (fp)
 519         *opened_name = uname;
 520       else
 521         {
 522           *opened_name = NULL;
 523           xfree (uname);
 524         }
 525     }
 526   else
 527     xfree (uname);
 528   return fp;
 529 }
 530
 531 /* Open the file for writing, with the addition that the file is
 532    opened "exclusively".  This means that, if the file already exists,
 533    this function will *fail* and errno will be set to EEXIST.  If
 534    BINARY is set, the file will be opened in binary mode, equivalent
 535    to fopen's "wb".
 536
 537    If opening the file fails for any reason, including the file having
 538    previously existed, this function returns NULL and sets errno
 539    appropriately.  */
 540
 541 FILE *
 542 fopen_excl (const char *fname, bool binary)
 543 {
 544   int fd;
 545 #ifdef O_EXCL
 546   int flags = O_WRONLY | O_CREAT | O_EXCL;
 547 # ifdef O_BINARY
 548   if (binary)
 549     flags |= O_BINARY;
 550 # endif
 551   fd = open (fname, flags, 0666);
 552   if (fd < 0)
 553     return NULL;
 554   return fdopen (fd, binary ? "wb" : "w");
 555 #else  /* not O_EXCL */
 556   /* Manually check whether the file exists.  This is prone to race
 557      conditions, but systems without O_EXCL haven't deserved
 558      better.  */
 559   if (file_exists_p (fname))
 560     {
 561       errno = EEXIST;
 562       return NULL;
 563     }
 564   return fopen (fname, binary ? "wb" : "w");
 565 #endif /* not O_EXCL */
 566 }
 567 \f
 568 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 569    are missing, create them first.  In case any mkdir() call fails,
 570    return its error status.  Returns 0 on successful completion.
 571
 572    The behaviour of this function should be identical to the behaviour
 573    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 574 int
 575 make_directory (const char *directory)
 576 {
 577   int i, ret, quit = 0;
 578   char *dir;
 579
 580   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 581      function is unsafe if called with a read-only char *argument.  */
 582   STRDUP_ALLOCA (dir, directory);
 583
 584   /* If the first character of dir is '/', skip it (and thus enable
 585      creation of absolute-pathname directories.  */
 586   for (i = (*dir == '/'); 1; ++i)
 587     {
 588       for (; dir[i] && dir[i] != '/'; i++)
 589         ;
 590       if (!dir[i])
 591         quit = 1;
 592       dir[i] = '\0';
 593       /* Check whether the directory already exists.  Allow creation of
 594          of intermediate directories to fail, as the initial path components
 595          are not necessarily directories!  */
 596       if (!file_exists_p (dir))
 597         ret = mkdir (dir, 0777);
 598       else
 599         ret = 0;
 600       if (quit)
 601         break;
 602       else
 603         dir[i] = '/';
 604     }
 605   return ret;
 606 }
 607
 608 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 609    should be a file name.
 610
 611    file_merge("/foo/bar", "baz")  => "/foo/baz"
 612    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 613    file_merge("foo", "bar")       => "bar"
 614
 615    In other words, it's a simpler and gentler version of uri_merge.  */
 616
 617 char *
 618 file_merge (const char *base, const char *file)
 619 {
 620   char *result;
 621   const char *cut = (const char *)strrchr (base, '/');
 622
 623   if (!cut)
 624     return xstrdup (file);
 625
 626   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 627   memcpy (result, base, cut - base);
 628   result[cut - base] = '/';
 629   strcpy (result + (cut - base) + 1, file);
 630
 631   return result;
 632 }
 633 \f
 634 /* Like fnmatch, but performs a case-insensitive match.  */
 635
 636 int
 637 fnmatch_nocase (const char *pattern, const char *string, int flags)
 638 {
 639 #ifdef FNM_CASEFOLD
 640   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 641      also present on *BSD platforms, and possibly elsewhere.  */
 642   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 643 #else
 644   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 645   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 646   char *strcopy = (char *) alloca (strlen (string) + 1);
 647   char *p;
 648   for (p = patcopy; *pattern; pattern++, p++)
 649     *p = TOLOWER (*pattern);
 650   *p = '\0';
 651   for (p = strcopy; *string; string++, p++)
 652     *p = TOLOWER (*string);
 653   *p = '\0';
 654   return fnmatch (patcopy, strcopy, flags);
 655 #endif
 656 }
 657
 658 static bool in_acclist (const char *const *, const char *, bool);
 659
 660 /* Determine whether a file is acceptable to be followed, according to
 661    lists of patterns to accept/reject.  */
 662 bool
 663 acceptable (const char *s)
 664 {
 665   int l = strlen (s);
 666
 667   while (l && s[l] != '/')
 668     --l;
 669   if (s[l] == '/')
 670     s += (l + 1);
 671   if (opt.accepts)
 672     {
 673       if (opt.rejects)
 674         return (in_acclist ((const char *const *)opt.accepts, s, true)
 675                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 676       else
 677         return in_acclist ((const char *const *)opt.accepts, s, true);
 678     }
 679   else if (opt.rejects)
 680     return !in_acclist ((const char *const *)opt.rejects, s, true);
 681   return true;
 682 }
 683
 684 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 685    will return true if and only if D2 begins with `/something/' or is exactly
 686    '/something'.  */
 687 bool
 688 subdir_p (const char *d1, const char *d2)
 689 {
 690   if (!opt.ignore_case)
 691     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 692       ;
 693   else
 694     for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
 695       ;
 696
 697   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 698 }
 699
 700 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 701    first element that matches DIR, through wildcards or front comparison (as
 702    appropriate).  */
 703 static bool
 704 dir_matches_p (char **dirlist, const char *dir)
 705 {
 706   char **x;
 707   int (*matcher) (const char *, const char *, int)
 708     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 709
 710   for (x = dirlist; *x; x++)
 711     {
 712       /* Remove leading '/' */
 713       char *p = *x + (**x == '/');
 714       if (has_wildcards_p (p))
 715         {
 716           if (matcher (p, dir, FNM_PATHNAME) == 0)
 717             break;
 718         }
 719       else
 720         {
 721           if (subdir_p (p, dir))
 722             break;
 723         }
 724     }
 725
 726   return *x ? true : false;
 727 }
 728
 729 /* Returns whether DIRECTORY is acceptable for download, wrt the
 730    include/exclude lists.
 731
 732    The leading `/' is ignored in paths; relative and absolute paths
 733    may be freely intermixed.  */
 734
 735 bool
 736 accdir (const char *directory)
 737 {
 738   /* Remove starting '/'.  */
 739   if (*directory == '/')
 740     ++directory;
 741   if (opt.includes)
 742     {
 743       if (!dir_matches_p (opt.includes, directory))
 744         return false;
 745     }
 746   if (opt.excludes)
 747     {
 748       if (dir_matches_p (opt.excludes, directory))
 749         return false;
 750     }
 751   return true;
 752 }
 753
 754 /* Return true if STRING ends with TAIL.  For instance:
 755
 756    match_tail ("abc", "bc", false)  -> 1
 757    match_tail ("abc", "ab", false)  -> 0
 758    match_tail ("abc", "abc", false) -> 1
 759
 760    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 761
 762 bool
 763 match_tail (const char *string, const char *tail, bool fold_case)
 764 {
 765   int i, j;
 766
 767   /* We want this to be fast, so we code two loops, one with
 768      case-folding, one without. */
 769
 770   if (!fold_case)
 771     {
 772       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 773         if (string[i] != tail[j])
 774           break;
 775     }
 776   else
 777     {
 778       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 779         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 780           break;
 781     }
 782
 783   /* If the tail was exhausted, the match was succesful.  */
 784   if (j == -1)
 785     return true;
 786   else
 787     return false;
 788 }
 789
 790 /* Checks whether string S matches each element of ACCEPTS.  A list
 791    element are matched either with fnmatch() or match_tail(),
 792    according to whether the element contains wildcards or not.
 793
 794    If the BACKWARD is false, don't do backward comparison -- just compare
 795    them normally.  */
 796 static bool
 797 in_acclist (const char *const *accepts, const char *s, bool backward)
 798 {
 799   for (; *accepts; accepts++)
 800     {
 801       if (has_wildcards_p (*accepts))
 802         {
 803           int res = opt.ignore_case
 804             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 805           /* fnmatch returns 0 if the pattern *does* match the string.  */
 806           if (res == 0)
 807             return true;
 808         }
 809       else
 810         {
 811           if (backward)
 812             {
 813               if (match_tail (s, *accepts, opt.ignore_case))
 814                 return true;
 815             }
 816           else
 817             {
 818               int cmp = opt.ignore_case
 819                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 820               if (cmp == 0)
 821                 return true;
 822             }
 823         }
 824     }
 825   return false;
 826 }
 827
 828 /* Return the location of STR's suffix (file extension).  Examples:
 829    suffix ("foo.bar")       -> "bar"
 830    suffix ("foo.bar.baz")   -> "baz"
 831    suffix ("/foo/bar")      -> NULL
 832    suffix ("/foo.bar/baz")  -> NULL  */
 833 char *
 834 suffix (const char *str)
 835 {
 836   int i;
 837
 838   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 839     ;
 840
 841   if (str[i++] == '.')
 842     return (char *)str + i;
 843   else
 844     return NULL;
 845 }
 846
 847 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 848    `]').  */
 849
 850 bool
 851 has_wildcards_p (const char *s)
 852 {
 853   for (; *s; s++)
 854     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 855       return true;
 856   return false;
 857 }
 858
 859 /* Return true if FNAME ends with a typical HTML suffix.  The
 860    following (case-insensitive) suffixes are presumed to be HTML
 861    files:
 862
 863      html
 864      htm
 865      ?html (`?' matches one character)
 866
 867    #### CAVEAT.  This is not necessarily a good indication that FNAME
 868    refers to a file that contains HTML!  */
 869 bool
 870 has_html_suffix_p (const char *fname)
 871 {
 872   char *suf;
 873
 874   if ((suf = suffix (fname)) == NULL)
 875     return false;
 876   if (!strcasecmp (suf, "html"))
 877     return true;
 878   if (!strcasecmp (suf, "htm"))
 879     return true;
 880   if (suf[0] && !strcasecmp (suf + 1, "html"))
 881     return true;
 882   return false;
 883 }
 884
 885 /* Read a line from FP and return the pointer to freshly allocated
 886    storage.  The storage space is obtained through malloc() and should
 887    be freed with free() when it is no longer needed.
 888
 889    The length of the line is not limited, except by available memory.
 890    The newline character at the end of line is retained.  The line is
 891    terminated with a zero character.
 892
 893    After end-of-file is encountered without anything being read, NULL
 894    is returned.  NULL is also returned on error.  To distinguish
 895    between these two cases, use the stdio function ferror().  */
 896
 897 char *
 898 read_whole_line (FILE *fp)
 899 {
 900   int length = 0;
 901   int bufsize = 82;
 902   char *line = xmalloc (bufsize);
 903
 904   while (fgets (line + length, bufsize - length, fp))
 905     {
 906       length += strlen (line + length);
 907       if (length == 0)
 908         /* Possible for example when reading from a binary file where
 909            a line begins with \0.  */
 910         continue;
 911
 912       if (line[length - 1] == '\n')
 913         break;
 914
 915       /* fgets() guarantees to read the whole line, or to use up the
 916          space we've given it.  We can double the buffer
 917          unconditionally.  */
 918       bufsize <<= 1;
 919       line = xrealloc (line, bufsize);
 920     }
 921   if (length == 0 || ferror (fp))
 922     {
 923       xfree (line);
 924       return NULL;
 925     }
 926   if (length + 1 < bufsize)
 927     /* Relieve the memory from our exponential greediness.  We say
 928        `length + 1' because the terminating \0 is not included in
 929        LENGTH.  We don't need to zero-terminate the string ourselves,
 930        though, because fgets() does that.  */
 931     line = xrealloc (line, length + 1);
 932   return line;
 933 }
 934 \f
 935 /* Read FILE into memory.  A pointer to `struct file_memory' are
 936    returned; use struct element `content' to access file contents, and
 937    the element `length' to know the file length.  `content' is *not*
 938    zero-terminated, and you should *not* read or write beyond the [0,
 939    length) range of characters.
 940
 941    After you are done with the file contents, call read_file_free to
 942    release the memory.
 943
 944    Depending on the operating system and the type of file that is
 945    being read, read_file() either mmap's the file into memory, or
 946    reads the file into the core using read().
 947
 948    If file is named "-", fileno(stdin) is used for reading instead.
 949    If you want to read from a real file named "-", use "./-" instead.  */
 950
 951 struct file_memory *
 952 read_file (const char *file)
 953 {
 954   int fd;
 955   struct file_memory *fm;
 956   long size;
 957   bool inhibit_close = false;
 958
 959   /* Some magic in the finest tradition of Perl and its kin: if FILE
 960      is "-", just use stdin.  */
 961   if (HYPHENP (file))
 962     {
 963       fd = fileno (stdin);
 964       inhibit_close = true;
 965       /* Note that we don't inhibit mmap() in this case.  If stdin is
 966          redirected from a regular file, mmap() will still work.  */
 967     }
 968   else
 969     fd = open (file, O_RDONLY);
 970   if (fd < 0)
 971     return NULL;
 972   fm = xnew (struct file_memory);
 973
 974 #ifdef HAVE_MMAP
 975   {
 976     struct_fstat buf;
 977     if (fstat (fd, &buf) < 0)
 978       goto mmap_lose;
 979     fm->length = buf.st_size;
 980     /* NOTE: As far as I know, the callers of this function never
 981        modify the file text.  Relying on this would enable us to
 982        specify PROT_READ and MAP_SHARED for a marginal gain in
 983        efficiency, but at some cost to generality.  */
 984     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 985                         MAP_PRIVATE, fd, 0);
 986     if (fm->content == (char *)MAP_FAILED)
 987       goto mmap_lose;
 988     if (!inhibit_close)
 989       close (fd);
 990
 991     fm->mmap_p = 1;
 992     return fm;
 993   }
 994
 995  mmap_lose:
 996   /* The most common reason why mmap() fails is that FD does not point
 997      to a plain file.  However, it's also possible that mmap() doesn't
 998      work for a particular type of file.  Therefore, whenever mmap()
 999      fails, we just fall back to the regular method.  */
1000 #endif /* HAVE_MMAP */
1001
1002   fm->length = 0;
1003   size = 512;                   /* number of bytes fm->contents can
1004                                    hold at any given time. */
1005   fm->content = xmalloc (size);
1006   while (1)
1007     {
1008       wgint nread;
1009       if (fm->length > size / 2)
1010         {
1011           /* #### I'm not sure whether the whole exponential-growth
1012              thing makes sense with kernel read.  On Linux at least,
1013              read() refuses to read more than 4K from a file at a
1014              single chunk anyway.  But other Unixes might optimize it
1015              better, and it doesn't *hurt* anything, so I'm leaving
1016              it.  */
1017
1018           /* Normally, we grow SIZE exponentially to make the number
1019              of calls to read() and realloc() logarithmic in relation
1020              to file size.  However, read() can read an amount of data
1021              smaller than requested, and it would be unreasonable to
1022              double SIZE every time *something* was read.  Therefore,
1023              we double SIZE only when the length exceeds half of the
1024              entire allocated size.  */
1025           size <<= 1;
1026           fm->content = xrealloc (fm->content, size);
1027         }
1028       nread = read (fd, fm->content + fm->length, size - fm->length);
1029       if (nread > 0)
1030         /* Successful read. */
1031         fm->length += nread;
1032       else if (nread < 0)
1033         /* Error. */
1034         goto lose;
1035       else
1036         /* EOF */
1037         break;
1038     }
1039   if (!inhibit_close)
1040     close (fd);
1041   if (size > fm->length && fm->length != 0)
1042     /* Due to exponential growth of fm->content, the allocated region
1043        might be much larger than what is actually needed.  */
1044     fm->content = xrealloc (fm->content, fm->length);
1045   fm->mmap_p = 0;
1046   return fm;
1047
1048  lose:
1049   if (!inhibit_close)
1050     close (fd);
1051   xfree (fm->content);
1052   xfree (fm);
1053   return NULL;
1054 }
1055
1056 /* Release the resources held by FM.  Specifically, this calls
1057    munmap() or xfree() on fm->content, depending whether mmap or
1058    malloc/read were used to read in the file.  It also frees the
1059    memory needed to hold the FM structure itself.  */
1060
1061 void
1062 read_file_free (struct file_memory *fm)
1063 {
1064 #ifdef HAVE_MMAP
1065   if (fm->mmap_p)
1066     {
1067       munmap (fm->content, fm->length);
1068     }
1069   else
1070 #endif
1071     {
1072       xfree (fm->content);
1073     }
1074   xfree (fm);
1075 }
1076 \f
1077 /* Free the pointers in a NULL-terminated vector of pointers, then
1078    free the pointer itself.  */
1079 void
1080 free_vec (char **vec)
1081 {
1082   if (vec)
1083     {
1084       char **p = vec;
1085       while (*p)
1086         xfree (*p++);
1087       xfree (vec);
1088     }
1089 }
1090
1091 /* Append vector V2 to vector V1.  The function frees V2 and
1092    reallocates V1 (thus you may not use the contents of neither
1093    pointer after the call).  If V1 is NULL, V2 is returned.  */
1094 char **
1095 merge_vecs (char **v1, char **v2)
1096 {
1097   int i, j;
1098
1099   if (!v1)
1100     return v2;
1101   if (!v2)
1102     return v1;
1103   if (!*v2)
1104     {
1105       /* To avoid j == 0 */
1106       xfree (v2);
1107       return v1;
1108     }
1109   /* Count v1.  */
1110   for (i = 0; v1[i]; i++)
1111     ;
1112   /* Count v2.  */
1113   for (j = 0; v2[j]; j++)
1114     ;
1115   /* Reallocate v1.  */
1116   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1117   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1118   xfree (v2);
1119   return v1;
1120 }
1121
1122 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1123    is allocated as needed.  Return the new value of the vector. */
1124
1125 char **
1126 vec_append (char **vec, const char *str)
1127 {
1128   int cnt;                      /* count of vector elements, including
1129                                    the one we're about to append */
1130   if (vec != NULL)
1131     {
1132       for (cnt = 0; vec[cnt]; cnt++)
1133         ;
1134       ++cnt;
1135     }
1136   else
1137     cnt = 1;
1138   /* Reallocate the array to fit the new element and the NULL. */
1139   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1140   /* Append a copy of STR to the vector. */
1141   vec[cnt - 1] = xstrdup (str);
1142   vec[cnt] = NULL;
1143   return vec;
1144 }
1145 \f
1146 /* Sometimes it's useful to create "sets" of strings, i.e. special
1147    hash tables where you want to store strings as keys and merely
1148    query for their existence.  Here is a set of utility routines that
1149    makes that transparent.  */
1150
1151 void
1152 string_set_add (struct hash_table *ht, const char *s)
1153 {
1154   /* First check whether the set element already exists.  If it does,
1155      do nothing so that we don't have to free() the old element and
1156      then strdup() a new one.  */
1157   if (hash_table_contains (ht, s))
1158     return;
1159
1160   /* We use "1" as value.  It provides us a useful and clear arbitrary
1161      value, and it consumes no memory -- the pointers to the same
1162      string "1" will be shared by all the key-value pairs in all `set'
1163      hash tables.  */
1164   hash_table_put (ht, xstrdup (s), "1");
1165 }
1166
1167 /* Synonym for hash_table_contains... */
1168
1169 int
1170 string_set_contains (struct hash_table *ht, const char *s)
1171 {
1172   return hash_table_contains (ht, s);
1173 }
1174
1175 /* Convert the specified string set to array.  ARRAY should be large
1176    enough to hold hash_table_count(ht) char pointers.  */
1177
1178 void string_set_to_array (struct hash_table *ht, char **array)
1179 {
1180   hash_table_iterator iter;
1181   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1182     *array++ = iter.key;
1183 }
1184
1185 /* Free the string set.  This frees both the storage allocated for
1186    keys and the actual hash table.  (hash_table_destroy would only
1187    destroy the hash table.)  */
1188
1189 void
1190 string_set_free (struct hash_table *ht)
1191 {
1192   hash_table_iterator iter;
1193   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1194     xfree (iter.key);
1195   hash_table_destroy (ht);
1196 }
1197
1198 /* Utility function: simply call xfree() on all keys and values of HT.  */
1199
1200 void
1201 free_keys_and_values (struct hash_table *ht)
1202 {
1203   hash_table_iterator iter;
1204   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1205     {
1206       xfree (iter.key);
1207       xfree (iter.value);
1208     }
1209 }
1210 \f
1211 /* Get digit grouping data for thousand separors by calling
1212    localeconv().  The data includes separator string and grouping info
1213    and is cached after the first call to the function.
1214
1215    In locales that don't set a thousand separator (such as the "C"
1216    locale), this forces it to be ",".  We are now only showing
1217    thousand separators in one place, so this shouldn't be a problem in
1218    practice.  */
1219
1220 static void
1221 get_grouping_data (const char **sep, const char **grouping)
1222 {
1223   static const char *cached_sep;
1224   static const char *cached_grouping;
1225   static bool initialized;
1226   if (!initialized)
1227     {
1228       /* Get the grouping info from the locale. */
1229       struct lconv *lconv = localeconv ();
1230       cached_sep = lconv->thousands_sep;
1231       cached_grouping = lconv->grouping;
1232       if (!*cached_sep)
1233         {
1234           /* Many locales (such as "C" or "hr_HR") don't specify
1235              grouping, which we still want to use it for legibility.
1236              In those locales set the sep char to ',', unless that
1237              character is used for decimal point, in which case set it
1238              to ".".  */
1239           if (*lconv->decimal_point != ',')
1240             cached_sep = ",";
1241           else
1242             cached_sep = ".";
1243           cached_grouping = "\x03";
1244         }
1245       initialized = true;
1246     }
1247   *sep = cached_sep;
1248   *grouping = cached_grouping;
1249 }
1250
1251 /* Return a printed representation of N with thousand separators.
1252    This should respect locale settings, with the exception of the "C"
1253    locale which mandates no separator, but we use one anyway.
1254
1255    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1256    the separators because it's too non-portable, and it's hard to test
1257    for this feature at configure time.  Besides, it wouldn't display
1258    separators in the "C" locale, still used by many Unix users.  */
1259
1260 const char *
1261 with_thousand_seps (wgint n)
1262 {
1263   static char outbuf[48];
1264   char *p = outbuf + sizeof outbuf;
1265
1266   /* Info received from locale */
1267   const char *grouping, *sep;
1268   int seplen;
1269
1270   /* State information */
1271   int i = 0, groupsize;
1272   const char *atgroup;
1273
1274   bool negative = n < 0;
1275
1276   /* Initialize grouping data. */
1277   get_grouping_data (&sep, &grouping);
1278   seplen = strlen (sep);
1279   atgroup = grouping;
1280   groupsize = *atgroup++;
1281
1282   /* This would overflow on WGINT_MIN, but printing negative numbers
1283      is not an important goal of this fuinction.  */
1284   if (negative)
1285     n = -n;
1286
1287   /* Write the number into the buffer, backwards, inserting the
1288      separators as necessary.  */
1289   *--p = '\0';
1290   while (1)
1291     {
1292       *--p = n % 10 + '0';
1293       n /= 10;
1294       if (n == 0)
1295         break;
1296       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1297       if (++i == groupsize)
1298         {
1299           if (seplen == 1)
1300             *--p = *sep;
1301           else
1302             memcpy (p -= seplen, sep, seplen);
1303           i = 0;
1304           if (*atgroup)
1305             groupsize = *atgroup++;
1306         }
1307     }
1308   if (negative)
1309     *--p = '-';
1310
1311   return p;
1312 }
1313
1314 /* N, a byte quantity, is converted to a human-readable abberviated
1315    form a la sizes printed by `ls -lh'.  The result is written to a
1316    static buffer, a pointer to which is returned.
1317
1318    Unlike `with_thousand_seps', this approximates to the nearest unit.
1319    Quoting GNU libit: "Most people visually process strings of 3-4
1320    digits effectively, but longer strings of digits are more prone to
1321    misinterpretation.  Hence, converting to an abbreviated form
1322    usually improves readability."
1323
1324    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1325    original computer-related meaning of "powers of 1024".  We don't
1326    use the "*bibyte" names invented in 1998, and seldom used in
1327    practice.  Wikipedia's entry on "binary prefix" discusses this in
1328    some detail.  */
1329
1330 char *
1331 human_readable (HR_NUMTYPE n)
1332 {
1333   /* These suffixes are compatible with those of GNU `ls -lh'. */
1334   static char powers[] =
1335     {
1336       'K',                      /* kilobyte, 2^10 bytes */
1337       'M',                      /* megabyte, 2^20 bytes */
1338       'G',                      /* gigabyte, 2^30 bytes */
1339       'T',                      /* terabyte, 2^40 bytes */
1340       'P',                      /* petabyte, 2^50 bytes */
1341       'E',                      /* exabyte,  2^60 bytes */
1342     };
1343   static char buf[8];
1344   int i;
1345
1346   /* If the quantity is smaller than 1K, just print it. */
1347   if (n < 1024)
1348     {
1349       snprintf (buf, sizeof (buf), "%d", (int) n);
1350       return buf;
1351     }
1352
1353   /* Loop over powers, dividing N with 1024 in each iteration.  This
1354      works unchanged for all sizes of wgint, while still avoiding
1355      non-portable `long double' arithmetic.  */
1356   for (i = 0; i < countof (powers); i++)
1357     {
1358       /* At each iteration N is greater than the *subsequent* power.
1359          That way N/1024.0 produces a decimal number in the units of
1360          *this* power.  */
1361       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1362         {
1363           double val = n / 1024.0;
1364           /* Print values smaller than 10 with one decimal digits, and
1365              others without any decimals.  */
1366           snprintf (buf, sizeof (buf), "%.*f%c",
1367                     val < 10 ? 1 : 0, val, powers[i]);
1368           return buf;
1369         }
1370       n /= 1024;
1371     }
1372   return NULL;                  /* unreached */
1373 }
1374
1375 /* Count the digits in the provided number.  Used to allocate space
1376    when printing numbers.  */
1377
1378 int
1379 numdigit (wgint number)
1380 {
1381   int cnt = 1;
1382   if (number < 0)
1383     ++cnt;                      /* accomodate '-' */
1384   while ((number /= 10) != 0)
1385     ++cnt;
1386   return cnt;
1387 }
1388
1389 #define PR(mask) *p++ = n / (mask) + '0'
1390
1391 /* DIGITS_<D> is used to print a D-digit number and should be called
1392    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1393    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1394    Recursively this continues until DIGITS_1 is invoked.  */
1395
1396 #define DIGITS_1(mask) PR (mask)
1397 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1398 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1399 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1400 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1401 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1402 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1403 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1404 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1405 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1406
1407 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1408
1409 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1410 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1411 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1412 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1413 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1414 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1415 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1416 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1417 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1418
1419 /* Shorthand for casting to wgint. */
1420 #define W wgint
1421
1422 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1423    `sprintf(buffer, "%lld", (long long) number)', only typically much
1424    faster and portable to machines without long long.
1425
1426    The speedup may make a difference in programs that frequently
1427    convert numbers to strings.  Some implementations of sprintf,
1428    particularly the one in some versions of GNU libc, have been known
1429    to be quite slow when converting integers to strings.
1430
1431    Return the pointer to the location where the terminating zero was
1432    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1433    function is done.)
1434
1435    BUFFER should be large enough to accept as many bytes as you expect
1436    the number to take up.  On machines with 64-bit wgints the maximum
1437    needed size is 24 bytes.  That includes the digits needed for the
1438    largest 64-bit number, the `-' sign in case it's negative, and the
1439    terminating '\0'.  */
1440
1441 char *
1442 number_to_string (char *buffer, wgint number)
1443 {
1444   char *p = buffer;
1445   wgint n = number;
1446
1447   int last_digit_char = 0;
1448
1449 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1450   /* We are running in a very strange environment.  Leave the correct
1451      printing to sprintf.  */
1452   p += sprintf (buf, "%j", (intmax_t) (n));
1453 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1454
1455   if (n < 0)
1456     {
1457       if (n < -WGINT_MAX)
1458         {
1459           /* n = -n would overflow because -n would evaluate to a
1460              wgint value larger than WGINT_MAX.  Need to make n
1461              smaller and handle the last digit separately.  */
1462           int last_digit = n % 10;
1463           /* The sign of n%10 is implementation-defined. */
1464           if (last_digit < 0)
1465             last_digit_char = '0' - last_digit;
1466           else
1467             last_digit_char = '0' + last_digit;
1468           /* After n is made smaller, -n will not overflow. */
1469           n /= 10;
1470         }
1471
1472       *p++ = '-';
1473       n = -n;
1474     }
1475
1476   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1477      way printing any N is fully open-coded without a loop or jump.
1478      (Also see description of DIGITS_*.)  */
1479
1480   if      (n < 10)                       DIGITS_1 (1);
1481   else if (n < 100)                      DIGITS_2 (10);
1482   else if (n < 1000)                     DIGITS_3 (100);
1483   else if (n < 10000)                    DIGITS_4 (1000);
1484   else if (n < 100000)                   DIGITS_5 (10000);
1485   else if (n < 1000000)                  DIGITS_6 (100000);
1486   else if (n < 10000000)                 DIGITS_7 (1000000);
1487   else if (n < 100000000)                DIGITS_8 (10000000);
1488   else if (n < 1000000000)               DIGITS_9 (100000000);
1489 #if SIZEOF_WGINT == 4
1490   /* wgint is 32 bits wide: no number has more than 10 digits. */
1491   else                                   DIGITS_10 (1000000000);
1492 #else
1493   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1494      Constants are constructed by compile-time multiplication to avoid
1495      dealing with different notations for 64-bit constants
1496      (nL/nLL/nI64, depending on the compiler and architecture).  */
1497   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1498   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1499   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1500   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1501   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1502   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1503   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1504   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1505   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1506   else                                   DIGITS_19 (1000000000*(W)1000000000);
1507 #endif
1508
1509   if (last_digit_char)
1510     *p++ = last_digit_char;
1511
1512   *p = '\0';
1513 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1514
1515   return p;
1516 }
1517
1518 #undef PR
1519 #undef W
1520 #undef SPRINTF_WGINT
1521 #undef DIGITS_1
1522 #undef DIGITS_2
1523 #undef DIGITS_3
1524 #undef DIGITS_4
1525 #undef DIGITS_5
1526 #undef DIGITS_6
1527 #undef DIGITS_7
1528 #undef DIGITS_8
1529 #undef DIGITS_9
1530 #undef DIGITS_10
1531 #undef DIGITS_11
1532 #undef DIGITS_12
1533 #undef DIGITS_13
1534 #undef DIGITS_14
1535 #undef DIGITS_15
1536 #undef DIGITS_16
1537 #undef DIGITS_17
1538 #undef DIGITS_18
1539 #undef DIGITS_19
1540
1541 #define RING_SIZE 3
1542
1543 /* Print NUMBER to a statically allocated string and return a pointer
1544    to the printed representation.
1545
1546    This function is intended to be used in conjunction with printf.
1547    It is hard to portably print wgint values:
1548     a) you cannot use printf("%ld", number) because wgint can be long
1549        long on 32-bit machines with LFS.
1550     b) you cannot use printf("%lld", number) because NUMBER could be
1551        long on 32-bit machines without LFS, or on 64-bit machines,
1552        which do not require LFS.  Also, Windows doesn't support %lld.
1553     c) you cannot use printf("%j", (int_max_t) number) because not all
1554        versions of printf support "%j", the most notable being the one
1555        on Windows.
1556     d) you cannot #define WGINT_FMT to the appropriate format and use
1557        printf(WGINT_FMT, number) because that would break translations
1558        for user-visible messages, such as printf("Downloaded: %d
1559        bytes\n", number).
1560
1561    What you should use instead is printf("%s", number_to_static_string
1562    (number)).
1563
1564    CAVEAT: since the function returns pointers to static data, you
1565    must be careful to copy its result before calling it again.
1566    However, to make it more useful with printf, the function maintains
1567    an internal ring of static buffers to return.  That way things like
1568    printf("%s %s", number_to_static_string (num1),
1569    number_to_static_string (num2)) work as expected.  Three buffers
1570    are currently used, which means that "%s %s %s" will work, but "%s
1571    %s %s %s" won't.  If you need to print more than three wgints,
1572    bump the RING_SIZE (or rethink your message.)  */
1573
1574 char *
1575 number_to_static_string (wgint number)
1576 {
1577   static char ring[RING_SIZE][24];
1578   static int ringpos;
1579   char *buf = ring[ringpos];
1580   number_to_string (buf, number);
1581   ringpos = (ringpos + 1) % RING_SIZE;
1582   return buf;
1583 }
1584 \f
1585 /* Determine the width of the terminal we're running on.  If that's
1586    not possible, return 0.  */
1587
1588 int
1589 determine_screen_width (void)
1590 {
1591   /* If there's a way to get the terminal size using POSIX
1592      tcgetattr(), somebody please tell me.  */
1593 #ifdef TIOCGWINSZ
1594   int fd;
1595   struct winsize wsz;
1596
1597   if (opt.lfilename != NULL)
1598     return 0;
1599
1600   fd = fileno (stderr);
1601   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1602     return 0;                   /* most likely ENOTTY */
1603
1604   return wsz.ws_col;
1605 #elif defined(WINDOWS)
1606   CONSOLE_SCREEN_BUFFER_INFO csbi;
1607   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1608     return 0;
1609   return csbi.dwSize.X;
1610 #else  /* neither TIOCGWINSZ nor WINDOWS */
1611   return 0;
1612 #endif /* neither TIOCGWINSZ nor WINDOWS */
1613 }
1614 \f
1615 /* Whether the rnd system (either rand or [dl]rand48) has been
1616    seeded.  */
1617 static int rnd_seeded;
1618
1619 /* Return a random number between 0 and MAX-1, inclusive.
1620
1621    If the system does not support lrand48 and MAX is greater than the
1622    value of RAND_MAX+1 on the system, the returned value will be in
1623    the range [0, RAND_MAX].  This may be fixed in a future release.
1624    The random number generator is seeded automatically the first time
1625    it is called.
1626
1627    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1628    for cryptography.  It is only meant to be used in situations where
1629    quality of the random numbers returned doesn't really matter.  */
1630
1631 int
1632 random_number (int max)
1633 {
1634 #ifdef HAVE_DRAND48
1635   if (!rnd_seeded)
1636     {
1637       srand48 ((long) time (NULL) ^ (long) getpid ());
1638       rnd_seeded = 1;
1639     }
1640   return lrand48 () % max;
1641 #else  /* not HAVE_DRAND48 */
1642
1643   double bounded;
1644   int rnd;
1645   if (!rnd_seeded)
1646     {
1647       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1648       rnd_seeded = 1;
1649     }
1650   rnd = rand ();
1651
1652   /* Like rand() % max, but uses the high-order bits for better
1653      randomness on architectures where rand() is implemented using a
1654      simple congruential generator.  */
1655
1656   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1657   return (int) bounded;
1658
1659 #endif /* not HAVE_DRAND48 */
1660 }
1661
1662 /* Return a random uniformly distributed floating point number in the
1663    [0, 1) range.  Uses drand48 where available, and a really lame
1664    kludge elsewhere.  */
1665
1666 double
1667 random_float (void)
1668 {
1669 #ifdef HAVE_DRAND48
1670   if (!rnd_seeded)
1671     {
1672       srand48 ((long) time (NULL) ^ (long) getpid ());
1673       rnd_seeded = 1;
1674     }
1675   return drand48 ();
1676 #else  /* not HAVE_DRAND48 */
1677   return (  random_number (10000) / 10000.0
1678           + random_number (10000) / (10000.0 * 10000.0)
1679           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1680           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1681 #endif /* not HAVE_DRAND48 */
1682 }
1683 \f
1684 /* Implementation of run_with_timeout, a generic timeout-forcing
1685    routine for systems with Unix-like signal handling.  */
1686
1687 #ifdef USE_SIGNAL_TIMEOUT
1688 # ifdef HAVE_SIGSETJMP
1689 #  define SETJMP(env) sigsetjmp (env, 1)
1690
1691 static sigjmp_buf run_with_timeout_env;
1692
1693 static void
1694 abort_run_with_timeout (int sig)
1695 {
1696   assert (sig == SIGALRM);
1697   siglongjmp (run_with_timeout_env, -1);
1698 }
1699 # else /* not HAVE_SIGSETJMP */
1700 #  define SETJMP(env) setjmp (env)
1701
1702 static jmp_buf run_with_timeout_env;
1703
1704 static void
1705 abort_run_with_timeout (int sig)
1706 {
1707   assert (sig == SIGALRM);
1708   /* We don't have siglongjmp to preserve the set of blocked signals;
1709      if we longjumped out of the handler at this point, SIGALRM would
1710      remain blocked.  We must unblock it manually. */
1711   int mask = siggetmask ();
1712   mask &= ~sigmask (SIGALRM);
1713   sigsetmask (mask);
1714
1715   /* Now it's safe to longjump. */
1716   longjmp (run_with_timeout_env, -1);
1717 }
1718 # endif /* not HAVE_SIGSETJMP */
1719
1720 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1721    setitimer where available, alarm otherwise.
1722
1723    TIMEOUT should be non-zero.  If the timeout value is so small that
1724    it would be rounded to zero, it is rounded to the least legal value
1725    instead (1us for setitimer, 1s for alarm).  That ensures that
1726    SIGALRM will be delivered in all cases.  */
1727
1728 static void
1729 alarm_set (double timeout)
1730 {
1731 #ifdef ITIMER_REAL
1732   /* Use the modern itimer interface. */
1733   struct itimerval itv;
1734   xzero (itv);
1735   itv.it_value.tv_sec = (long) timeout;
1736   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1737   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1738     /* Ensure that we wait for at least the minimum interval.
1739        Specifying zero would mean "wait forever".  */
1740     itv.it_value.tv_usec = 1;
1741   setitimer (ITIMER_REAL, &itv, NULL);
1742 #else  /* not ITIMER_REAL */
1743   /* Use the old alarm() interface. */
1744   int secs = (int) timeout;
1745   if (secs == 0)
1746     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1747        because alarm(0) means "never deliver the alarm", i.e. "wait
1748        forever", which is not what someone who specifies a 0.5s
1749        timeout would expect.  */
1750     secs = 1;
1751   alarm (secs);
1752 #endif /* not ITIMER_REAL */
1753 }
1754
1755 /* Cancel the alarm set with alarm_set. */
1756
1757 static void
1758 alarm_cancel (void)
1759 {
1760 #ifdef ITIMER_REAL
1761   struct itimerval disable;
1762   xzero (disable);
1763   setitimer (ITIMER_REAL, &disable, NULL);
1764 #else  /* not ITIMER_REAL */
1765   alarm (0);
1766 #endif /* not ITIMER_REAL */
1767 }
1768
1769 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1770    seconds.  Returns true if the function was interrupted with a
1771    timeout, false otherwise.
1772
1773    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1774    using setitimer() or alarm().  The timeout is enforced by
1775    longjumping out of the SIGALRM handler.  This has several
1776    advantages compared to the traditional approach of relying on
1777    signals causing system calls to exit with EINTR:
1778
1779      * The callback function is *forcibly* interrupted after the
1780        timeout expires, (almost) regardless of what it was doing and
1781        whether it was in a syscall.  For example, a calculation that
1782        takes a long time is interrupted as reliably as an IO
1783        operation.
1784
1785      * It works with both SYSV and BSD signals because it doesn't
1786        depend on the default setting of SA_RESTART.
1787
1788      * It doesn't require special handler setup beyond a simple call
1789        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1790        optional.)
1791
1792    The only downside is that, if FUN allocates internal resources that
1793    are normally freed prior to exit from the functions, they will be
1794    lost in case of timeout.  */
1795
1796 bool
1797 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1798 {
1799   int saved_errno;
1800
1801   if (timeout == 0)
1802     {
1803       fun (arg);
1804       return false;
1805     }
1806
1807   signal (SIGALRM, abort_run_with_timeout);
1808   if (SETJMP (run_with_timeout_env) != 0)
1809     {
1810       /* Longjumped out of FUN with a timeout. */
1811       signal (SIGALRM, SIG_DFL);
1812       return true;
1813     }
1814   alarm_set (timeout);
1815   fun (arg);
1816
1817   /* Preserve errno in case alarm() or signal() modifies it. */
1818   saved_errno = errno;
1819   alarm_cancel ();
1820   signal (SIGALRM, SIG_DFL);
1821   errno = saved_errno;
1822
1823   return false;
1824 }
1825
1826 #else  /* not USE_SIGNAL_TIMEOUT */
1827
1828 #ifndef WINDOWS
1829 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1830    define it under Windows, because Windows has its own version of
1831    run_with_timeout that uses threads.  */
1832
1833 int
1834 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1835 {
1836   fun (arg);
1837   return false;
1838 }
1839 #endif /* not WINDOWS */
1840 #endif /* not USE_SIGNAL_TIMEOUT */
1841 \f
1842 #ifndef WINDOWS
1843
1844 /* Sleep the specified amount of seconds.  On machines without
1845    nanosleep(), this may sleep shorter if interrupted by signals.  */
1846
1847 void
1848 xsleep (double seconds)
1849 {
1850 #ifdef HAVE_NANOSLEEP
1851   /* nanosleep is the preferred interface because it offers high
1852      accuracy and, more importantly, because it allows us to reliably
1853      restart receiving a signal such as SIGWINCH.  (There was an
1854      actual Debian bug report about --limit-rate malfunctioning while
1855      the terminal was being resized.)  */
1856   struct timespec sleep, remaining;
1857   sleep.tv_sec = (long) seconds;
1858   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1859   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1860     /* If nanosleep has been interrupted by a signal, adjust the
1861        sleeping period and return to sleep.  */
1862     sleep = remaining;
1863 #elif defined(HAVE_USLEEP)
1864   /* If usleep is available, use it in preference to select.  */
1865   if (seconds >= 1)
1866     {
1867       /* On some systems, usleep cannot handle values larger than
1868          1,000,000.  If the period is larger than that, use sleep
1869          first, then add usleep for subsecond accuracy.  */
1870       sleep (seconds);
1871       seconds -= (long) seconds;
1872     }
1873   usleep (seconds * 1000000);
1874 #else /* fall back select */
1875   /* Note that, although Windows supports select, it can't be used to
1876      implement sleeping because Winsock's select doesn't implement
1877      timeout when it is passed NULL pointers for all fd sets.  (But it
1878      does under Cygwin, which implements Unix-compatible select.)  */
1879   struct timeval sleep;
1880   sleep.tv_sec = (long) seconds;
1881   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1882   select (0, NULL, NULL, NULL, &sleep);
1883   /* If select returns -1 and errno is EINTR, it means we were
1884      interrupted by a signal.  But without knowing how long we've
1885      actually slept, we can't return to sleep.  Using gettimeofday to
1886      track sleeps is slow and unreliable due to clock skew.  */
1887 #endif
1888 }
1889
1890 #endif /* not WINDOWS */
1891
1892 /* Encode the octets in DATA of length LENGTH to base64 format,
1893    storing the result to DEST.  The output will be zero-terminated,
1894    and must point to a writable buffer of at least
1895    1+BASE64_LENGTH(length) bytes.  The function returns the length of
1896    the resulting base64 data, not counting the terminating zero.
1897
1898    This implementation does not emit newlines after 76 characters of
1899    base64 data.  */
1900
1901 int
1902 base64_encode (const void *data, int length, char *dest)
1903 {
1904   /* Conversion table.  */
1905   static const char tbl[64] = {
1906     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1907     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1908     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1909     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1910   };
1911   /* Access bytes in DATA as unsigned char, otherwise the shifts below
1912      don't work for data with MSB set. */
1913   const unsigned char *s = data;
1914   /* Theoretical ANSI violation when length < 3. */
1915   const unsigned char *end = (const unsigned char *) data + length - 2;
1916   char *p = dest;
1917
1918   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1919   for (; s < end; s += 3)
1920     {
1921       *p++ = tbl[s[0] >> 2];
1922       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1923       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1924       *p++ = tbl[s[2] & 0x3f];
1925     }
1926
1927   /* Pad the result if necessary...  */
1928   switch (length % 3)
1929     {
1930     case 1:
1931       *p++ = tbl[s[0] >> 2];
1932       *p++ = tbl[(s[0] & 3) << 4];
1933       *p++ = '=';
1934       *p++ = '=';
1935       break;
1936     case 2:
1937       *p++ = tbl[s[0] >> 2];
1938       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1939       *p++ = tbl[((s[1] & 0xf) << 2)];
1940       *p++ = '=';
1941       break;
1942     }
1943   /* ...and zero-terminate it.  */
1944   *p = '\0';
1945
1946   return p - dest;
1947 }
1948
1949 /* Store in C the next non-whitespace character from the string, or \0
1950    when end of string is reached.  */
1951 #define NEXT_CHAR(c, p) do {                    \
1952   c = (unsigned char) *p++;                     \
1953 } while (ISSPACE (c))
1954
1955 #define IS_ASCII(c) (((c) & 0x80) == 0)
1956
1957 /* Decode data from BASE64 (a null-terminated string) into memory
1958    pointed to by DEST.  DEST is assumed to be large enough to
1959    accomodate the decoded data, which is guaranteed to be no more than
1960    3/4*strlen(base64).
1961
1962    Since DEST is assumed to contain binary data, it is not
1963    NUL-terminated.  The function returns the length of the data
1964    written to TO.  -1 is returned in case of error caused by malformed
1965    base64 input.
1966
1967    This function originates from Free Recode.  */
1968
1969 int
1970 base64_decode (const char *base64, void *dest)
1971 {
1972   /* Table of base64 values for first 128 characters.  Note that this
1973      assumes ASCII (but so does Wget in other places).  */
1974   static const signed char base64_char_to_value[128] =
1975     {
1976       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
1977       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
1978       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
1979       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
1980       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
1981       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
1982       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
1983       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
1984       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
1985       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
1986       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
1987       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
1988       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
1989     };
1990 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
1991 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
1992
1993   const char *p = base64;
1994   char *q = dest;
1995
1996   while (1)
1997     {
1998       unsigned char c;
1999       unsigned long value;
2000
2001       /* Process first byte of a quadruplet.  */
2002       NEXT_CHAR (c, p);
2003       if (!c)
2004         break;
2005       if (c == '=' || !IS_BASE64 (c))
2006         return -1;              /* illegal char while decoding base64 */
2007       value = BASE64_CHAR_TO_VALUE (c) << 18;
2008
2009       /* Process second byte of a quadruplet.  */
2010       NEXT_CHAR (c, p);
2011       if (!c)
2012         return -1;              /* premature EOF while decoding base64 */
2013       if (c == '=' || !IS_BASE64 (c))
2014         return -1;              /* illegal char while decoding base64 */
2015       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2016       *q++ = value >> 16;
2017
2018       /* Process third byte of a quadruplet.  */
2019       NEXT_CHAR (c, p);
2020       if (!c)
2021         return -1;              /* premature EOF while decoding base64 */
2022       if (!IS_BASE64 (c))
2023         return -1;              /* illegal char while decoding base64 */
2024
2025       if (c == '=')
2026         {
2027           NEXT_CHAR (c, p);
2028           if (!c)
2029             return -1;          /* premature EOF while decoding base64 */
2030           if (c != '=')
2031             return -1;          /* padding `=' expected but not found */
2032           continue;
2033         }
2034
2035       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2036       *q++ = 0xff & value >> 8;
2037
2038       /* Process fourth byte of a quadruplet.  */
2039       NEXT_CHAR (c, p);
2040       if (!c)
2041         return -1;              /* premature EOF while decoding base64 */
2042       if (c == '=')
2043         continue;
2044       if (!IS_BASE64 (c))
2045         return -1;              /* illegal char while decoding base64 */
2046
2047       value |= BASE64_CHAR_TO_VALUE (c);
2048       *q++ = 0xff & value;
2049     }
2050 #undef IS_BASE64
2051 #undef BASE64_CHAR_TO_VALUE
2052
2053   return q - (char *) dest;
2054 }
2055
2056 #undef IS_ASCII
2057 #undef NEXT_CHAR
2058 \f
2059 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2060    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2061
2062 static void
2063 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2064                     int (*cmpfun) (const void *, const void *))
2065 {
2066 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2067   if (from < to)
2068     {
2069       size_t i, j, k;
2070       size_t mid = (to + from) / 2;
2071       mergesort_internal (base, temp, size, from, mid, cmpfun);
2072       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2073       i = from;
2074       j = mid + 1;
2075       for (k = from; (i <= mid) && (j <= to); k++)
2076         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2077           memcpy (ELT (temp, k), ELT (base, i++), size);
2078         else
2079           memcpy (ELT (temp, k), ELT (base, j++), size);
2080       while (i <= mid)
2081         memcpy (ELT (temp, k++), ELT (base, i++), size);
2082       while (j <= to)
2083         memcpy (ELT (temp, k++), ELT (base, j++), size);
2084       for (k = from; k <= to; k++)
2085         memcpy (ELT (base, k), ELT (temp, k), size);
2086     }
2087 #undef ELT
2088 }
2089
2090 /* Stable sort with interface exactly like standard library's qsort.
2091    Uses mergesort internally, allocating temporary storage with
2092    alloca.  */
2093
2094 void
2095 stable_sort (void *base, size_t nmemb, size_t size,
2096              int (*cmpfun) (const void *, const void *))
2097 {
2098   if (size > 1)
2099     {
2100       void *temp = alloca (nmemb * size * sizeof (void *));
2101       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2102     }
2103 }
2104 \f
2105 /* Print a decimal number.  If it is equal to or larger than ten, the
2106    number is rounded.  Otherwise it is printed with one significant
2107    digit without trailing zeros and with no more than three fractional
2108    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2109    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2110
2111    This is useful for displaying durations because it provides
2112    order-of-magnitude information without unnecessary clutter --
2113    long-running downloads are shown without the fractional part, and
2114    short ones still retain one significant digit.  */
2115
2116 const char *
2117 print_decimal (double number)
2118 {
2119   static char buf[32];
2120   double n = number >= 0 ? number : -number;
2121
2122   if (n >= 9.95)
2123     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2124        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2125     snprintf (buf, sizeof buf, "%.0f", number);
2126   else if (n >= 0.95)
2127     snprintf (buf, sizeof buf, "%.1f", number);
2128   else if (n >= 0.001)
2129     snprintf (buf, sizeof buf, "%.1g", number);
2130   else if (n >= 0.0005)
2131     /* round [0.0005, 0.001) to 0.001 */
2132     snprintf (buf, sizeof buf, "%.3f", number);
2133   else
2134     /* print numbers close to 0 as 0, not 0.000 */
2135     strcpy (buf, "0");
2136
2137   return buf;
2138 }
2139
2140 #ifdef TESTING
2141
2142 const char *
2143 test_subdir_p()
2144 {
2145   int i;
2146   struct {
2147     char *d1;
2148     char *d2;
2149     bool result;
2150   } test_array[] = {
2151     { "/somedir", "/somedir", true },
2152     { "/somedir", "/somedir/d2", true },
2153     { "/somedir/d1", "/somedir", false },
2154   };
2155
2156   for (i = 0; i < countof(test_array); ++i)
2157     {
2158       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2159
2160       mu_assert ("test_subdir_p: wrong result",
2161                  res == test_array[i].result);
2162     }
2163
2164   return NULL;
2165 }
2166
2167 const char *
2168 test_dir_matches_p()
2169 {
2170   int i;
2171   struct {
2172     char *dirlist[3];
2173     char *dir;
2174     bool result;
2175   } test_array[] = {
2176     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2177     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2178     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2179     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2180     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2181   };
2182
2183   for (i = 0; i < countof(test_array); ++i)
2184     {
2185       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2186
2187       mu_assert ("test_dir_matches_p: wrong result",
2188                  res == test_array[i].result);
2189     }
2190
2191   return NULL;
2192 }
2193
2194 #endif /* TESTING */
2195