sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996-2006 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3 of the License, or
   9 (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  18
  19 In addition, as a special exception, the Free Software Foundation
  20 gives permission to link the code of its release of Wget with the
  21 OpenSSL project's "OpenSSL" library (or with modified versions of it
  22 that use the same license as the "OpenSSL" library), and distribute
  23 the linked executables.  You must obey the GNU General Public License
  24 in all respects for all of the code used other than "OpenSSL".  If you
  25 modify this file, you may extend this exception to your version of the
  26 file, but you are not obligated to do so.  If you do not wish to do
  27 so, delete this exception statement from your version.  */
  28
  29 #include <config.h>
  30
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <time.h>
  35 #ifdef HAVE_SYS_TIME_H
  36 # include <sys/time.h>
  37 #endif
  38 #ifdef HAVE_UNISTD_H
  39 # include <unistd.h>
  40 #endif
  41 #ifdef HAVE_MMAP
  42 # include <sys/mman.h>
  43 #endif
  44 #ifdef HAVE_UTIME_H
  45 # include <utime.h>
  46 #endif
  47 #ifdef HAVE_SYS_UTIME_H
  48 # include <sys/utime.h>
  49 #endif
  50 #include <errno.h>
  51 #include <fcntl.h>
  52 #include <assert.h>
  53 #include <stdarg.h>
  54 #include <locale.h>
  55
  56 /* For TIOCGWINSZ and friends: */
  57 #ifdef HAVE_SYS_IOCTL_H
  58 # include <sys/ioctl.h>
  59 #endif
  60 #ifdef HAVE_TERMIOS_H
  61 # include <termios.h>
  62 #endif
  63
  64 /* Needed for Unix version of run_with_timeout. */
  65 #include <signal.h>
  66 #include <setjmp.h>
  67
  68 #ifndef HAVE_SIGSETJMP
  69 /* If sigsetjmp is a macro, configure won't pick it up. */
  70 # ifdef sigsetjmp
  71 #  define HAVE_SIGSETJMP
  72 # endif
  73 #endif
  74
  75 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  76 # define USE_SIGNAL_TIMEOUT
  77 #endif
  78
  79 #include "wget.h"
  80 #include "utils.h"
  81 #include "hash.h"
  82
  83 #ifdef TESTING
  84 #include "test.h"
  85 #endif
  86
  87 /* Utility function: like xstrdup(), but also lowercases S.  */
  88
  89 char *
  90 xstrdup_lower (const char *s)
  91 {
  92   char *copy = xstrdup (s);
  93   char *p = copy;
  94   for (; *p; p++)
  95     *p = TOLOWER (*p);
  96   return copy;
  97 }
  98
  99 /* Copy the string formed by two pointers (one on the beginning, other
 100    on the char after the last char) to a new, malloc-ed location.
 101    0-terminate it.  */
 102 char *
 103 strdupdelim (const char *beg, const char *end)
 104 {
 105   char *res = xmalloc (end - beg + 1);
 106   memcpy (res, beg, end - beg);
 107   res[end - beg] = '\0';
 108   return res;
 109 }
 110
 111 /* Parse a string containing comma-separated elements, and return a
 112    vector of char pointers with the elements.  Spaces following the
 113    commas are ignored.  */
 114 char **
 115 sepstring (const char *s)
 116 {
 117   char **res;
 118   const char *p;
 119   int i = 0;
 120
 121   if (!s || !*s)
 122     return NULL;
 123   res = NULL;
 124   p = s;
 125   while (*s)
 126     {
 127       if (*s == ',')
 128         {
 129           res = xrealloc (res, (i + 2) * sizeof (char *));
 130           res[i] = strdupdelim (p, s);
 131           res[++i] = NULL;
 132           ++s;
 133           /* Skip the blanks following the ','.  */
 134           while (ISSPACE (*s))
 135             ++s;
 136           p = s;
 137         }
 138       else
 139         ++s;
 140     }
 141   res = xrealloc (res, (i + 2) * sizeof (char *));
 142   res[i] = strdupdelim (p, s);
 143   res[i + 1] = NULL;
 144   return res;
 145 }
 146 \f
 147 /* Like sprintf, but prints into a string of sufficient size freshly
 148    allocated with malloc, which is returned.  If unable to print due
 149    to invalid format, returns NULL.  Inability to allocate needed
 150    memory results in abort, as with xmalloc.  This is in spirit
 151    similar to the GNU/BSD extension asprintf, but somewhat easier to
 152    use.
 153
 154    Internally the function either calls vasprintf or loops around
 155    vsnprintf until the correct size is found.  Since Wget also ships a
 156    fallback implementation of vsnprintf, this should be portable.  */
 157
 158 char *
 159 aprintf (const char *fmt, ...)
 160 {
 161 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 162   /* Use vasprintf. */
 163   int ret;
 164   va_list args;
 165   char *str;
 166   va_start (args, fmt);
 167   ret = vasprintf (&str, fmt, args);
 168   va_end (args);
 169   if (ret < 0 && errno == ENOMEM)
 170     abort ();                   /* for consistency with xmalloc/xrealloc */
 171   else if (ret < 0)
 172     return NULL;
 173   return str;
 174 #else  /* not HAVE_VASPRINTF */
 175
 176   /* vasprintf is unavailable.  snprintf into a small buffer and
 177      resize it as necessary. */
 178   int size = 32;
 179   char *str = xmalloc (size);
 180
 181   /* #### This code will infloop and eventually abort in xrealloc if
 182      passed a FMT that causes snprintf to consistently return -1.  */
 183
 184   while (1)
 185     {
 186       int n;
 187       va_list args;
 188
 189       va_start (args, fmt);
 190       n = vsnprintf (str, size, fmt, args);
 191       va_end (args);
 192
 193       /* If the printing worked, return the string. */
 194       if (n > -1 && n < size)
 195         return str;
 196
 197       /* Else try again with a larger buffer. */
 198       if (n > -1)               /* C99 */
 199         size = n + 1;           /* precisely what is needed */
 200       else
 201         size <<= 1;             /* twice the old size */
 202       str = xrealloc (str, size);
 203     }
 204 #endif /* not HAVE_VASPRINTF */
 205 }
 206
 207 /* Concatenate the NULL-terminated list of string arguments into
 208    freshly allocated space.  */
 209
 210 char *
 211 concat_strings (const char *str0, ...)
 212 {
 213   va_list args;
 214   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 215   char *ret, *p;
 216
 217   const char *next_str;
 218   int total_length = 0;
 219   int argcount;
 220
 221   /* Calculate the length of and allocate the resulting string. */
 222
 223   argcount = 0;
 224   va_start (args, str0);
 225   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 226     {
 227       int len = strlen (next_str);
 228       if (argcount < countof (saved_lengths))
 229         saved_lengths[argcount++] = len;
 230       total_length += len;
 231     }
 232   va_end (args);
 233   p = ret = xmalloc (total_length + 1);
 234
 235   /* Copy the strings into the allocated space. */
 236
 237   argcount = 0;
 238   va_start (args, str0);
 239   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 240     {
 241       int len;
 242       if (argcount < countof (saved_lengths))
 243         len = saved_lengths[argcount++];
 244       else
 245         len = strlen (next_str);
 246       memcpy (p, next_str, len);
 247       p += len;
 248     }
 249   va_end (args);
 250   *p = '\0';
 251
 252   return ret;
 253 }
 254 \f
 255 /* Format the provided time according to the specified format.  The
 256    format is a string with format elements supported by strftime.  */
 257
 258 static char *
 259 fmttime (time_t t, const char *fmt)
 260 {
 261   static char output[32];
 262   struct tm *tm = localtime(&t);
 263   if (!tm)
 264     abort ();
 265   if (!strftime(output, sizeof(output), fmt, tm))
 266     abort ();
 267   return output;
 268 }
 269
 270 /* Return pointer to a static char[] buffer in which zero-terminated
 271    string-representation of TM (in form hh:mm:ss) is printed.
 272
 273    If TM is NULL, the current time will be used.  */
 274
 275 char *
 276 time_str (time_t t)
 277 {
 278   return fmttime(t, "%H:%M:%S");
 279 }
 280
 281 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 282
 283 char *
 284 datetime_str (time_t t)
 285 {
 286   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 287 }
 288 \f
 289 /* The Windows versions of the following two functions are defined in
 290    mswindows.c.  */
 291
 292 #ifndef WINDOWS
 293 void
 294 fork_to_background (void)
 295 {
 296   pid_t pid;
 297   /* Whether we arrange our own version of opt.lfilename here.  */
 298   bool logfile_changed = false;
 299
 300   if (!opt.lfilename)
 301     {
 302       /* We must create the file immediately to avoid either a race
 303          condition (which arises from using unique_name and failing to
 304          use fopen_excl) or lying to the user about the log file name
 305          (which arises from using unique_name, printing the name, and
 306          using fopen_excl later on.)  */
 307       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 308       if (new_log_fp)
 309         {
 310           logfile_changed = true;
 311           fclose (new_log_fp);
 312         }
 313     }
 314   pid = fork ();
 315   if (pid < 0)
 316     {
 317       /* parent, error */
 318       perror ("fork");
 319       exit (1);
 320     }
 321   else if (pid != 0)
 322     {
 323       /* parent, no error */
 324       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 325       if (logfile_changed)
 326         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 327       exit (0);                 /* #### should we use _exit()? */
 328     }
 329
 330   /* child: give up the privileges and keep running. */
 331   setsid ();
 332   freopen ("/dev/null", "r", stdin);
 333   freopen ("/dev/null", "w", stdout);
 334   freopen ("/dev/null", "w", stderr);
 335 }
 336 #endif /* not WINDOWS */
 337 \f
 338 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 339    specified with TM.  The atime ("access time") is set to the current
 340    time.  */
 341
 342 void
 343 touch (const char *file, time_t tm)
 344 {
 345 #ifdef HAVE_STRUCT_UTIMBUF
 346   struct utimbuf times;
 347 #else
 348   struct {
 349     time_t actime;
 350     time_t modtime;
 351   } times;
 352 #endif
 353   times.modtime = tm;
 354   times.actime = time (NULL);
 355   if (utime (file, &times) == -1)
 356     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 357 }
 358
 359 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 360    nothing under MS-Windows.  */
 361 int
 362 remove_link (const char *file)
 363 {
 364   int err = 0;
 365   struct_stat st;
 366
 367   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 368     {
 369       DEBUGP (("Unlinking %s (symlink).\n", file));
 370       err = unlink (file);
 371       if (err != 0)
 372         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 373                    file, strerror (errno));
 374     }
 375   return err;
 376 }
 377
 378 /* Does FILENAME exist?  This is quite a lousy implementation, since
 379    it supplies no error codes -- only a yes-or-no answer.  Thus it
 380    will return that a file does not exist if, e.g., the directory is
 381    unreadable.  I don't mind it too much currently, though.  The
 382    proper way should, of course, be to have a third, error state,
 383    other than true/false, but that would introduce uncalled-for
 384    additional complexity to the callers.  */
 385 bool
 386 file_exists_p (const char *filename)
 387 {
 388 #ifdef HAVE_ACCESS
 389   return access (filename, F_OK) >= 0;
 390 #else
 391   struct_stat buf;
 392   return stat (filename, &buf) >= 0;
 393 #endif
 394 }
 395
 396 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 397    Returns 0 on error.  */
 398 bool
 399 file_non_directory_p (const char *path)
 400 {
 401   struct_stat buf;
 402   /* Use lstat() rather than stat() so that symbolic links pointing to
 403      directories can be identified correctly.  */
 404   if (lstat (path, &buf) != 0)
 405     return false;
 406   return S_ISDIR (buf.st_mode) ? false : true;
 407 }
 408
 409 /* Return the size of file named by FILENAME, or -1 if it cannot be
 410    opened or seeked into. */
 411 wgint
 412 file_size (const char *filename)
 413 {
 414 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 415   wgint size;
 416   /* We use fseek rather than stat to determine the file size because
 417      that way we can also verify that the file is readable without
 418      explicitly checking for permissions.  Inspired by the POST patch
 419      by Arnaud Wylie.  */
 420   FILE *fp = fopen (filename, "rb");
 421   if (!fp)
 422     return -1;
 423   fseeko (fp, 0, SEEK_END);
 424   size = ftello (fp);
 425   fclose (fp);
 426   return size;
 427 #else
 428   struct_stat st;
 429   if (stat (filename, &st) < 0)
 430     return -1;
 431   return st.st_size;
 432 #endif
 433 }
 434
 435 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 436    doesn't exist is found.  Return a freshly allocated copy of the
 437    unused file name.  */
 438
 439 static char *
 440 unique_name_1 (const char *prefix)
 441 {
 442   int count = 1;
 443   int plen = strlen (prefix);
 444   char *template = (char *)alloca (plen + 1 + 24);
 445   char *template_tail = template + plen;
 446
 447   memcpy (template, prefix, plen);
 448   *template_tail++ = '.';
 449
 450   do
 451     number_to_string (template_tail, count++);
 452   while (file_exists_p (template));
 453
 454   return xstrdup (template);
 455 }
 456
 457 /* Return a unique file name, based on FILE.
 458
 459    More precisely, if FILE doesn't exist, it is returned unmodified.
 460    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 461    file name that doesn't exist is returned.
 462
 463    The resulting file is not created, only verified that it didn't
 464    exist at the point in time when the function was called.
 465    Therefore, where security matters, don't rely that the file created
 466    by this function exists until you open it with O_EXCL or
 467    equivalent.
 468
 469    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 470    string.  Otherwise, it may return FILE if the file doesn't exist
 471    (and therefore doesn't need changing).  */
 472
 473 char *
 474 unique_name (const char *file, bool allow_passthrough)
 475 {
 476   /* If the FILE itself doesn't exist, return it without
 477      modification. */
 478   if (!file_exists_p (file))
 479     return allow_passthrough ? (char *)file : xstrdup (file);
 480
 481   /* Otherwise, find a numeric suffix that results in unused file name
 482      and return it.  */
 483   return unique_name_1 (file);
 484 }
 485
 486 /* Create a file based on NAME, except without overwriting an existing
 487    file with that name.  Providing O_EXCL is correctly implemented,
 488    this function does not have the race condition associated with
 489    opening the file returned by unique_name.  */
 490
 491 FILE *
 492 unique_create (const char *name, bool binary, char **opened_name)
 493 {
 494   /* unique file name, based on NAME */
 495   char *uname = unique_name (name, false);
 496   FILE *fp;
 497   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 498     {
 499       xfree (uname);
 500       uname = unique_name (name, false);
 501     }
 502   if (opened_name && fp != NULL)
 503     {
 504       if (fp)
 505         *opened_name = uname;
 506       else
 507         {
 508           *opened_name = NULL;
 509           xfree (uname);
 510         }
 511     }
 512   else
 513     xfree (uname);
 514   return fp;
 515 }
 516
 517 /* Open the file for writing, with the addition that the file is
 518    opened "exclusively".  This means that, if the file already exists,
 519    this function will *fail* and errno will be set to EEXIST.  If
 520    BINARY is set, the file will be opened in binary mode, equivalent
 521    to fopen's "wb".
 522
 523    If opening the file fails for any reason, including the file having
 524    previously existed, this function returns NULL and sets errno
 525    appropriately.  */
 526
 527 FILE *
 528 fopen_excl (const char *fname, bool binary)
 529 {
 530   int fd;
 531 #ifdef O_EXCL
 532   int flags = O_WRONLY | O_CREAT | O_EXCL;
 533 # ifdef O_BINARY
 534   if (binary)
 535     flags |= O_BINARY;
 536 # endif
 537   fd = open (fname, flags, 0666);
 538   if (fd < 0)
 539     return NULL;
 540   return fdopen (fd, binary ? "wb" : "w");
 541 #else  /* not O_EXCL */
 542   /* Manually check whether the file exists.  This is prone to race
 543      conditions, but systems without O_EXCL haven't deserved
 544      better.  */
 545   if (file_exists_p (fname))
 546     {
 547       errno = EEXIST;
 548       return NULL;
 549     }
 550   return fopen (fname, binary ? "wb" : "w");
 551 #endif /* not O_EXCL */
 552 }
 553 \f
 554 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 555    are missing, create them first.  In case any mkdir() call fails,
 556    return its error status.  Returns 0 on successful completion.
 557
 558    The behaviour of this function should be identical to the behaviour
 559    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 560 int
 561 make_directory (const char *directory)
 562 {
 563   int i, ret, quit = 0;
 564   char *dir;
 565
 566   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 567      function is unsafe if called with a read-only char *argument.  */
 568   STRDUP_ALLOCA (dir, directory);
 569
 570   /* If the first character of dir is '/', skip it (and thus enable
 571      creation of absolute-pathname directories.  */
 572   for (i = (*dir == '/'); 1; ++i)
 573     {
 574       for (; dir[i] && dir[i] != '/'; i++)
 575         ;
 576       if (!dir[i])
 577         quit = 1;
 578       dir[i] = '\0';
 579       /* Check whether the directory already exists.  Allow creation of
 580          of intermediate directories to fail, as the initial path components
 581          are not necessarily directories!  */
 582       if (!file_exists_p (dir))
 583         ret = mkdir (dir, 0777);
 584       else
 585         ret = 0;
 586       if (quit)
 587         break;
 588       else
 589         dir[i] = '/';
 590     }
 591   return ret;
 592 }
 593
 594 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 595    should be a file name.
 596
 597    file_merge("/foo/bar", "baz")  => "/foo/baz"
 598    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 599    file_merge("foo", "bar")       => "bar"
 600
 601    In other words, it's a simpler and gentler version of uri_merge.  */
 602
 603 char *
 604 file_merge (const char *base, const char *file)
 605 {
 606   char *result;
 607   const char *cut = (const char *)strrchr (base, '/');
 608
 609   if (!cut)
 610     return xstrdup (file);
 611
 612   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 613   memcpy (result, base, cut - base);
 614   result[cut - base] = '/';
 615   strcpy (result + (cut - base) + 1, file);
 616
 617   return result;
 618 }
 619 \f
 620 /* Like fnmatch, but performs a case-insensitive match.  */
 621
 622 int
 623 fnmatch_nocase (const char *pattern, const char *string, int flags)
 624 {
 625 #ifdef FNM_CASEFOLD
 626   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 627      also present on *BSD platforms, and possibly elsewhere.  */
 628   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 629 #else
 630   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 631   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 632   char *strcopy = (char *) alloca (strlen (string) + 1);
 633   char *p;
 634   for (p = patcopy; *pattern; pattern++, p++)
 635     *p = TOLOWER (*pattern);
 636   *p = '\0';
 637   for (p = strcopy; *string; string++, p++)
 638     *p = TOLOWER (*string);
 639   *p = '\0';
 640   return fnmatch (patcopy, strcopy, flags);
 641 #endif
 642 }
 643
 644 static bool in_acclist (const char *const *, const char *, bool);
 645
 646 /* Determine whether a file is acceptable to be followed, according to
 647    lists of patterns to accept/reject.  */
 648 bool
 649 acceptable (const char *s)
 650 {
 651   int l = strlen (s);
 652
 653   while (l && s[l] != '/')
 654     --l;
 655   if (s[l] == '/')
 656     s += (l + 1);
 657   if (opt.accepts)
 658     {
 659       if (opt.rejects)
 660         return (in_acclist ((const char *const *)opt.accepts, s, true)
 661                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 662       else
 663         return in_acclist ((const char *const *)opt.accepts, s, true);
 664     }
 665   else if (opt.rejects)
 666     return !in_acclist ((const char *const *)opt.rejects, s, true);
 667   return true;
 668 }
 669
 670 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 671    will return true if and only if D2 begins with `/something/' or is exactly
 672    '/something'.  */
 673 bool
 674 subdir_p (const char *d1, const char *d2)
 675 {
 676   if (!opt.ignore_case)
 677     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 678       ;
 679   else
 680     for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
 681       ;
 682
 683   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 684 }
 685
 686 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 687    first element that matches DIR, through wildcards or front comparison (as
 688    appropriate).  */
 689 static bool
 690 dir_matches_p (char **dirlist, const char *dir)
 691 {
 692   char **x;
 693   int (*matcher) (const char *, const char *, int)
 694     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 695
 696   for (x = dirlist; *x; x++)
 697     {
 698       /* Remove leading '/' */
 699       char *p = *x + (**x == '/');
 700       if (has_wildcards_p (p))
 701         {
 702           if (matcher (p, dir, FNM_PATHNAME) == 0)
 703             break;
 704         }
 705       else
 706         {
 707           if (subdir_p (p, dir))
 708             break;
 709         }
 710     }
 711
 712   return *x ? true : false;
 713 }
 714
 715 /* Returns whether DIRECTORY is acceptable for download, wrt the
 716    include/exclude lists.
 717
 718    The leading `/' is ignored in paths; relative and absolute paths
 719    may be freely intermixed.  */
 720
 721 bool
 722 accdir (const char *directory)
 723 {
 724   /* Remove starting '/'.  */
 725   if (*directory == '/')
 726     ++directory;
 727   if (opt.includes)
 728     {
 729       if (!dir_matches_p (opt.includes, directory))
 730         return false;
 731     }
 732   if (opt.excludes)
 733     {
 734       if (dir_matches_p (opt.excludes, directory))
 735         return false;
 736     }
 737   return true;
 738 }
 739
 740 /* Return true if STRING ends with TAIL.  For instance:
 741
 742    match_tail ("abc", "bc", false)  -> 1
 743    match_tail ("abc", "ab", false)  -> 0
 744    match_tail ("abc", "abc", false) -> 1
 745
 746    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 747
 748 bool
 749 match_tail (const char *string, const char *tail, bool fold_case)
 750 {
 751   int i, j;
 752
 753   /* We want this to be fast, so we code two loops, one with
 754      case-folding, one without. */
 755
 756   if (!fold_case)
 757     {
 758       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 759         if (string[i] != tail[j])
 760           break;
 761     }
 762   else
 763     {
 764       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 765         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 766           break;
 767     }
 768
 769   /* If the tail was exhausted, the match was succesful.  */
 770   if (j == -1)
 771     return true;
 772   else
 773     return false;
 774 }
 775
 776 /* Checks whether string S matches each element of ACCEPTS.  A list
 777    element are matched either with fnmatch() or match_tail(),
 778    according to whether the element contains wildcards or not.
 779
 780    If the BACKWARD is false, don't do backward comparison -- just compare
 781    them normally.  */
 782 static bool
 783 in_acclist (const char *const *accepts, const char *s, bool backward)
 784 {
 785   for (; *accepts; accepts++)
 786     {
 787       if (has_wildcards_p (*accepts))
 788         {
 789           int res = opt.ignore_case
 790             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 791           /* fnmatch returns 0 if the pattern *does* match the string.  */
 792           if (res == 0)
 793             return true;
 794         }
 795       else
 796         {
 797           if (backward)
 798             {
 799               if (match_tail (s, *accepts, opt.ignore_case))
 800                 return true;
 801             }
 802           else
 803             {
 804               int cmp = opt.ignore_case
 805                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 806               if (cmp == 0)
 807                 return true;
 808             }
 809         }
 810     }
 811   return false;
 812 }
 813
 814 /* Return the location of STR's suffix (file extension).  Examples:
 815    suffix ("foo.bar")       -> "bar"
 816    suffix ("foo.bar.baz")   -> "baz"
 817    suffix ("/foo/bar")      -> NULL
 818    suffix ("/foo.bar/baz")  -> NULL  */
 819 char *
 820 suffix (const char *str)
 821 {
 822   int i;
 823
 824   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 825     ;
 826
 827   if (str[i++] == '.')
 828     return (char *)str + i;
 829   else
 830     return NULL;
 831 }
 832
 833 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 834    `]').  */
 835
 836 bool
 837 has_wildcards_p (const char *s)
 838 {
 839   for (; *s; s++)
 840     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 841       return true;
 842   return false;
 843 }
 844
 845 /* Return true if FNAME ends with a typical HTML suffix.  The
 846    following (case-insensitive) suffixes are presumed to be HTML
 847    files:
 848
 849      html
 850      htm
 851      ?html (`?' matches one character)
 852
 853    #### CAVEAT.  This is not necessarily a good indication that FNAME
 854    refers to a file that contains HTML!  */
 855 bool
 856 has_html_suffix_p (const char *fname)
 857 {
 858   char *suf;
 859
 860   if ((suf = suffix (fname)) == NULL)
 861     return false;
 862   if (!strcasecmp (suf, "html"))
 863     return true;
 864   if (!strcasecmp (suf, "htm"))
 865     return true;
 866   if (suf[0] && !strcasecmp (suf + 1, "html"))
 867     return true;
 868   return false;
 869 }
 870
 871 /* Read a line from FP and return the pointer to freshly allocated
 872    storage.  The storage space is obtained through malloc() and should
 873    be freed with free() when it is no longer needed.
 874
 875    The length of the line is not limited, except by available memory.
 876    The newline character at the end of line is retained.  The line is
 877    terminated with a zero character.
 878
 879    After end-of-file is encountered without anything being read, NULL
 880    is returned.  NULL is also returned on error.  To distinguish
 881    between these two cases, use the stdio function ferror().  */
 882
 883 char *
 884 read_whole_line (FILE *fp)
 885 {
 886   int length = 0;
 887   int bufsize = 82;
 888   char *line = xmalloc (bufsize);
 889
 890   while (fgets (line + length, bufsize - length, fp))
 891     {
 892       length += strlen (line + length);
 893       if (length == 0)
 894         /* Possible for example when reading from a binary file where
 895            a line begins with \0.  */
 896         continue;
 897
 898       if (line[length - 1] == '\n')
 899         break;
 900
 901       /* fgets() guarantees to read the whole line, or to use up the
 902          space we've given it.  We can double the buffer
 903          unconditionally.  */
 904       bufsize <<= 1;
 905       line = xrealloc (line, bufsize);
 906     }
 907   if (length == 0 || ferror (fp))
 908     {
 909       xfree (line);
 910       return NULL;
 911     }
 912   if (length + 1 < bufsize)
 913     /* Relieve the memory from our exponential greediness.  We say
 914        `length + 1' because the terminating \0 is not included in
 915        LENGTH.  We don't need to zero-terminate the string ourselves,
 916        though, because fgets() does that.  */
 917     line = xrealloc (line, length + 1);
 918   return line;
 919 }
 920 \f
 921 /* Read FILE into memory.  A pointer to `struct file_memory' are
 922    returned; use struct element `content' to access file contents, and
 923    the element `length' to know the file length.  `content' is *not*
 924    zero-terminated, and you should *not* read or write beyond the [0,
 925    length) range of characters.
 926
 927    After you are done with the file contents, call read_file_free to
 928    release the memory.
 929
 930    Depending on the operating system and the type of file that is
 931    being read, read_file() either mmap's the file into memory, or
 932    reads the file into the core using read().
 933
 934    If file is named "-", fileno(stdin) is used for reading instead.
 935    If you want to read from a real file named "-", use "./-" instead.  */
 936
 937 struct file_memory *
 938 read_file (const char *file)
 939 {
 940   int fd;
 941   struct file_memory *fm;
 942   long size;
 943   bool inhibit_close = false;
 944
 945   /* Some magic in the finest tradition of Perl and its kin: if FILE
 946      is "-", just use stdin.  */
 947   if (HYPHENP (file))
 948     {
 949       fd = fileno (stdin);
 950       inhibit_close = true;
 951       /* Note that we don't inhibit mmap() in this case.  If stdin is
 952          redirected from a regular file, mmap() will still work.  */
 953     }
 954   else
 955     fd = open (file, O_RDONLY);
 956   if (fd < 0)
 957     return NULL;
 958   fm = xnew (struct file_memory);
 959
 960 #ifdef HAVE_MMAP
 961   {
 962     struct_fstat buf;
 963     if (fstat (fd, &buf) < 0)
 964       goto mmap_lose;
 965     fm->length = buf.st_size;
 966     /* NOTE: As far as I know, the callers of this function never
 967        modify the file text.  Relying on this would enable us to
 968        specify PROT_READ and MAP_SHARED for a marginal gain in
 969        efficiency, but at some cost to generality.  */
 970     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 971                         MAP_PRIVATE, fd, 0);
 972     if (fm->content == (char *)MAP_FAILED)
 973       goto mmap_lose;
 974     if (!inhibit_close)
 975       close (fd);
 976
 977     fm->mmap_p = 1;
 978     return fm;
 979   }
 980
 981  mmap_lose:
 982   /* The most common reason why mmap() fails is that FD does not point
 983      to a plain file.  However, it's also possible that mmap() doesn't
 984      work for a particular type of file.  Therefore, whenever mmap()
 985      fails, we just fall back to the regular method.  */
 986 #endif /* HAVE_MMAP */
 987
 988   fm->length = 0;
 989   size = 512;                   /* number of bytes fm->contents can
 990                                    hold at any given time. */
 991   fm->content = xmalloc (size);
 992   while (1)
 993     {
 994       wgint nread;
 995       if (fm->length > size / 2)
 996         {
 997           /* #### I'm not sure whether the whole exponential-growth
 998              thing makes sense with kernel read.  On Linux at least,
 999              read() refuses to read more than 4K from a file at a
1000              single chunk anyway.  But other Unixes might optimize it
1001              better, and it doesn't *hurt* anything, so I'm leaving
1002              it.  */
1003
1004           /* Normally, we grow SIZE exponentially to make the number
1005              of calls to read() and realloc() logarithmic in relation
1006              to file size.  However, read() can read an amount of data
1007              smaller than requested, and it would be unreasonable to
1008              double SIZE every time *something* was read.  Therefore,
1009              we double SIZE only when the length exceeds half of the
1010              entire allocated size.  */
1011           size <<= 1;
1012           fm->content = xrealloc (fm->content, size);
1013         }
1014       nread = read (fd, fm->content + fm->length, size - fm->length);
1015       if (nread > 0)
1016         /* Successful read. */
1017         fm->length += nread;
1018       else if (nread < 0)
1019         /* Error. */
1020         goto lose;
1021       else
1022         /* EOF */
1023         break;
1024     }
1025   if (!inhibit_close)
1026     close (fd);
1027   if (size > fm->length && fm->length != 0)
1028     /* Due to exponential growth of fm->content, the allocated region
1029        might be much larger than what is actually needed.  */
1030     fm->content = xrealloc (fm->content, fm->length);
1031   fm->mmap_p = 0;
1032   return fm;
1033
1034  lose:
1035   if (!inhibit_close)
1036     close (fd);
1037   xfree (fm->content);
1038   xfree (fm);
1039   return NULL;
1040 }
1041
1042 /* Release the resources held by FM.  Specifically, this calls
1043    munmap() or xfree() on fm->content, depending whether mmap or
1044    malloc/read were used to read in the file.  It also frees the
1045    memory needed to hold the FM structure itself.  */
1046
1047 void
1048 read_file_free (struct file_memory *fm)
1049 {
1050 #ifdef HAVE_MMAP
1051   if (fm->mmap_p)
1052     {
1053       munmap (fm->content, fm->length);
1054     }
1055   else
1056 #endif
1057     {
1058       xfree (fm->content);
1059     }
1060   xfree (fm);
1061 }
1062 \f
1063 /* Free the pointers in a NULL-terminated vector of pointers, then
1064    free the pointer itself.  */
1065 void
1066 free_vec (char **vec)
1067 {
1068   if (vec)
1069     {
1070       char **p = vec;
1071       while (*p)
1072         xfree (*p++);
1073       xfree (vec);
1074     }
1075 }
1076
1077 /* Append vector V2 to vector V1.  The function frees V2 and
1078    reallocates V1 (thus you may not use the contents of neither
1079    pointer after the call).  If V1 is NULL, V2 is returned.  */
1080 char **
1081 merge_vecs (char **v1, char **v2)
1082 {
1083   int i, j;
1084
1085   if (!v1)
1086     return v2;
1087   if (!v2)
1088     return v1;
1089   if (!*v2)
1090     {
1091       /* To avoid j == 0 */
1092       xfree (v2);
1093       return v1;
1094     }
1095   /* Count v1.  */
1096   for (i = 0; v1[i]; i++)
1097     ;
1098   /* Count v2.  */
1099   for (j = 0; v2[j]; j++)
1100     ;
1101   /* Reallocate v1.  */
1102   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1103   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1104   xfree (v2);
1105   return v1;
1106 }
1107
1108 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1109    is allocated as needed.  Return the new value of the vector. */
1110
1111 char **
1112 vec_append (char **vec, const char *str)
1113 {
1114   int cnt;                      /* count of vector elements, including
1115                                    the one we're about to append */
1116   if (vec != NULL)
1117     {
1118       for (cnt = 0; vec[cnt]; cnt++)
1119         ;
1120       ++cnt;
1121     }
1122   else
1123     cnt = 1;
1124   /* Reallocate the array to fit the new element and the NULL. */
1125   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1126   /* Append a copy of STR to the vector. */
1127   vec[cnt - 1] = xstrdup (str);
1128   vec[cnt] = NULL;
1129   return vec;
1130 }
1131 \f
1132 /* Sometimes it's useful to create "sets" of strings, i.e. special
1133    hash tables where you want to store strings as keys and merely
1134    query for their existence.  Here is a set of utility routines that
1135    makes that transparent.  */
1136
1137 void
1138 string_set_add (struct hash_table *ht, const char *s)
1139 {
1140   /* First check whether the set element already exists.  If it does,
1141      do nothing so that we don't have to free() the old element and
1142      then strdup() a new one.  */
1143   if (hash_table_contains (ht, s))
1144     return;
1145
1146   /* We use "1" as value.  It provides us a useful and clear arbitrary
1147      value, and it consumes no memory -- the pointers to the same
1148      string "1" will be shared by all the key-value pairs in all `set'
1149      hash tables.  */
1150   hash_table_put (ht, xstrdup (s), "1");
1151 }
1152
1153 /* Synonym for hash_table_contains... */
1154
1155 int
1156 string_set_contains (struct hash_table *ht, const char *s)
1157 {
1158   return hash_table_contains (ht, s);
1159 }
1160
1161 /* Convert the specified string set to array.  ARRAY should be large
1162    enough to hold hash_table_count(ht) char pointers.  */
1163
1164 void string_set_to_array (struct hash_table *ht, char **array)
1165 {
1166   hash_table_iterator iter;
1167   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1168     *array++ = iter.key;
1169 }
1170
1171 /* Free the string set.  This frees both the storage allocated for
1172    keys and the actual hash table.  (hash_table_destroy would only
1173    destroy the hash table.)  */
1174
1175 void
1176 string_set_free (struct hash_table *ht)
1177 {
1178   hash_table_iterator iter;
1179   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1180     xfree (iter.key);
1181   hash_table_destroy (ht);
1182 }
1183
1184 /* Utility function: simply call xfree() on all keys and values of HT.  */
1185
1186 void
1187 free_keys_and_values (struct hash_table *ht)
1188 {
1189   hash_table_iterator iter;
1190   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1191     {
1192       xfree (iter.key);
1193       xfree (iter.value);
1194     }
1195 }
1196 \f
1197 /* Get digit grouping data for thousand separors by calling
1198    localeconv().  The data includes separator string and grouping info
1199    and is cached after the first call to the function.
1200
1201    In locales that don't set a thousand separator (such as the "C"
1202    locale), this forces it to be ",".  We are now only showing
1203    thousand separators in one place, so this shouldn't be a problem in
1204    practice.  */
1205
1206 static void
1207 get_grouping_data (const char **sep, const char **grouping)
1208 {
1209   static const char *cached_sep;
1210   static const char *cached_grouping;
1211   static bool initialized;
1212   if (!initialized)
1213     {
1214       /* Get the grouping info from the locale. */
1215       struct lconv *lconv = localeconv ();
1216       cached_sep = lconv->thousands_sep;
1217       cached_grouping = lconv->grouping;
1218       if (!*cached_sep)
1219         {
1220           /* Many locales (such as "C" or "hr_HR") don't specify
1221              grouping, which we still want to use it for legibility.
1222              In those locales set the sep char to ',', unless that
1223              character is used for decimal point, in which case set it
1224              to ".".  */
1225           if (*lconv->decimal_point != ',')
1226             cached_sep = ",";
1227           else
1228             cached_sep = ".";
1229           cached_grouping = "\x03";
1230         }
1231       initialized = true;
1232     }
1233   *sep = cached_sep;
1234   *grouping = cached_grouping;
1235 }
1236
1237 /* Return a printed representation of N with thousand separators.
1238    This should respect locale settings, with the exception of the "C"
1239    locale which mandates no separator, but we use one anyway.
1240
1241    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1242    the separators because it's too non-portable, and it's hard to test
1243    for this feature at configure time.  Besides, it wouldn't display
1244    separators in the "C" locale, still used by many Unix users.  */
1245
1246 const char *
1247 with_thousand_seps (wgint n)
1248 {
1249   static char outbuf[48];
1250   char *p = outbuf + sizeof outbuf;
1251
1252   /* Info received from locale */
1253   const char *grouping, *sep;
1254   int seplen;
1255
1256   /* State information */
1257   int i = 0, groupsize;
1258   const char *atgroup;
1259
1260   bool negative = n < 0;
1261
1262   /* Initialize grouping data. */
1263   get_grouping_data (&sep, &grouping);
1264   seplen = strlen (sep);
1265   atgroup = grouping;
1266   groupsize = *atgroup++;
1267
1268   /* This would overflow on WGINT_MIN, but printing negative numbers
1269      is not an important goal of this fuinction.  */
1270   if (negative)
1271     n = -n;
1272
1273   /* Write the number into the buffer, backwards, inserting the
1274      separators as necessary.  */
1275   *--p = '\0';
1276   while (1)
1277     {
1278       *--p = n % 10 + '0';
1279       n /= 10;
1280       if (n == 0)
1281         break;
1282       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1283       if (++i == groupsize)
1284         {
1285           if (seplen == 1)
1286             *--p = *sep;
1287           else
1288             memcpy (p -= seplen, sep, seplen);
1289           i = 0;
1290           if (*atgroup)
1291             groupsize = *atgroup++;
1292         }
1293     }
1294   if (negative)
1295     *--p = '-';
1296
1297   return p;
1298 }
1299
1300 /* N, a byte quantity, is converted to a human-readable abberviated
1301    form a la sizes printed by `ls -lh'.  The result is written to a
1302    static buffer, a pointer to which is returned.
1303
1304    Unlike `with_thousand_seps', this approximates to the nearest unit.
1305    Quoting GNU libit: "Most people visually process strings of 3-4
1306    digits effectively, but longer strings of digits are more prone to
1307    misinterpretation.  Hence, converting to an abbreviated form
1308    usually improves readability."
1309
1310    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1311    original computer-related meaning of "powers of 1024".  We don't
1312    use the "*bibyte" names invented in 1998, and seldom used in
1313    practice.  Wikipedia's entry on "binary prefix" discusses this in
1314    some detail.  */
1315
1316 char *
1317 human_readable (HR_NUMTYPE n)
1318 {
1319   /* These suffixes are compatible with those of GNU `ls -lh'. */
1320   static char powers[] =
1321     {
1322       'K',                      /* kilobyte, 2^10 bytes */
1323       'M',                      /* megabyte, 2^20 bytes */
1324       'G',                      /* gigabyte, 2^30 bytes */
1325       'T',                      /* terabyte, 2^40 bytes */
1326       'P',                      /* petabyte, 2^50 bytes */
1327       'E',                      /* exabyte,  2^60 bytes */
1328     };
1329   static char buf[8];
1330   int i;
1331
1332   /* If the quantity is smaller than 1K, just print it. */
1333   if (n < 1024)
1334     {
1335       snprintf (buf, sizeof (buf), "%d", (int) n);
1336       return buf;
1337     }
1338
1339   /* Loop over powers, dividing N with 1024 in each iteration.  This
1340      works unchanged for all sizes of wgint, while still avoiding
1341      non-portable `long double' arithmetic.  */
1342   for (i = 0; i < countof (powers); i++)
1343     {
1344       /* At each iteration N is greater than the *subsequent* power.
1345          That way N/1024.0 produces a decimal number in the units of
1346          *this* power.  */
1347       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1348         {
1349           double val = n / 1024.0;
1350           /* Print values smaller than 10 with one decimal digits, and
1351              others without any decimals.  */
1352           snprintf (buf, sizeof (buf), "%.*f%c",
1353                     val < 10 ? 1 : 0, val, powers[i]);
1354           return buf;
1355         }
1356       n /= 1024;
1357     }
1358   return NULL;                  /* unreached */
1359 }
1360
1361 /* Count the digits in the provided number.  Used to allocate space
1362    when printing numbers.  */
1363
1364 int
1365 numdigit (wgint number)
1366 {
1367   int cnt = 1;
1368   if (number < 0)
1369     ++cnt;                      /* accomodate '-' */
1370   while ((number /= 10) != 0)
1371     ++cnt;
1372   return cnt;
1373 }
1374
1375 #define PR(mask) *p++ = n / (mask) + '0'
1376
1377 /* DIGITS_<D> is used to print a D-digit number and should be called
1378    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1379    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1380    Recursively this continues until DIGITS_1 is invoked.  */
1381
1382 #define DIGITS_1(mask) PR (mask)
1383 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1384 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1385 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1386 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1387 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1388 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1389 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1390 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1391 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1392
1393 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1394
1395 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1396 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1397 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1398 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1399 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1400 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1401 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1402 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1403 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1404
1405 /* Shorthand for casting to wgint. */
1406 #define W wgint
1407
1408 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1409    `sprintf(buffer, "%lld", (long long) number)', only typically much
1410    faster and portable to machines without long long.
1411
1412    The speedup may make a difference in programs that frequently
1413    convert numbers to strings.  Some implementations of sprintf,
1414    particularly the one in some versions of GNU libc, have been known
1415    to be quite slow when converting integers to strings.
1416
1417    Return the pointer to the location where the terminating zero was
1418    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1419    function is done.)
1420
1421    BUFFER should be large enough to accept as many bytes as you expect
1422    the number to take up.  On machines with 64-bit wgints the maximum
1423    needed size is 24 bytes.  That includes the digits needed for the
1424    largest 64-bit number, the `-' sign in case it's negative, and the
1425    terminating '\0'.  */
1426
1427 char *
1428 number_to_string (char *buffer, wgint number)
1429 {
1430   char *p = buffer;
1431   wgint n = number;
1432
1433   int last_digit_char = 0;
1434
1435 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1436   /* We are running in a very strange environment.  Leave the correct
1437      printing to sprintf.  */
1438   p += sprintf (buf, "%j", (intmax_t) (n));
1439 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1440
1441   if (n < 0)
1442     {
1443       if (n < -WGINT_MAX)
1444         {
1445           /* n = -n would overflow because -n would evaluate to a
1446              wgint value larger than WGINT_MAX.  Need to make n
1447              smaller and handle the last digit separately.  */
1448           int last_digit = n % 10;
1449           /* The sign of n%10 is implementation-defined. */
1450           if (last_digit < 0)
1451             last_digit_char = '0' - last_digit;
1452           else
1453             last_digit_char = '0' + last_digit;
1454           /* After n is made smaller, -n will not overflow. */
1455           n /= 10;
1456         }
1457
1458       *p++ = '-';
1459       n = -n;
1460     }
1461
1462   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1463      way printing any N is fully open-coded without a loop or jump.
1464      (Also see description of DIGITS_*.)  */
1465
1466   if      (n < 10)                       DIGITS_1 (1);
1467   else if (n < 100)                      DIGITS_2 (10);
1468   else if (n < 1000)                     DIGITS_3 (100);
1469   else if (n < 10000)                    DIGITS_4 (1000);
1470   else if (n < 100000)                   DIGITS_5 (10000);
1471   else if (n < 1000000)                  DIGITS_6 (100000);
1472   else if (n < 10000000)                 DIGITS_7 (1000000);
1473   else if (n < 100000000)                DIGITS_8 (10000000);
1474   else if (n < 1000000000)               DIGITS_9 (100000000);
1475 #if SIZEOF_WGINT == 4
1476   /* wgint is 32 bits wide: no number has more than 10 digits. */
1477   else                                   DIGITS_10 (1000000000);
1478 #else
1479   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1480      Constants are constructed by compile-time multiplication to avoid
1481      dealing with different notations for 64-bit constants
1482      (nL/nLL/nI64, depending on the compiler and architecture).  */
1483   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1484   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1485   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1486   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1487   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1488   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1489   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1490   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1491   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1492   else                                   DIGITS_19 (1000000000*(W)1000000000);
1493 #endif
1494
1495   if (last_digit_char)
1496     *p++ = last_digit_char;
1497
1498   *p = '\0';
1499 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1500
1501   return p;
1502 }
1503
1504 #undef PR
1505 #undef W
1506 #undef SPRINTF_WGINT
1507 #undef DIGITS_1
1508 #undef DIGITS_2
1509 #undef DIGITS_3
1510 #undef DIGITS_4
1511 #undef DIGITS_5
1512 #undef DIGITS_6
1513 #undef DIGITS_7
1514 #undef DIGITS_8
1515 #undef DIGITS_9
1516 #undef DIGITS_10
1517 #undef DIGITS_11
1518 #undef DIGITS_12
1519 #undef DIGITS_13
1520 #undef DIGITS_14
1521 #undef DIGITS_15
1522 #undef DIGITS_16
1523 #undef DIGITS_17
1524 #undef DIGITS_18
1525 #undef DIGITS_19
1526
1527 #define RING_SIZE 3
1528
1529 /* Print NUMBER to a statically allocated string and return a pointer
1530    to the printed representation.
1531
1532    This function is intended to be used in conjunction with printf.
1533    It is hard to portably print wgint values:
1534     a) you cannot use printf("%ld", number) because wgint can be long
1535        long on 32-bit machines with LFS.
1536     b) you cannot use printf("%lld", number) because NUMBER could be
1537        long on 32-bit machines without LFS, or on 64-bit machines,
1538        which do not require LFS.  Also, Windows doesn't support %lld.
1539     c) you cannot use printf("%j", (int_max_t) number) because not all
1540        versions of printf support "%j", the most notable being the one
1541        on Windows.
1542     d) you cannot #define WGINT_FMT to the appropriate format and use
1543        printf(WGINT_FMT, number) because that would break translations
1544        for user-visible messages, such as printf("Downloaded: %d
1545        bytes\n", number).
1546
1547    What you should use instead is printf("%s", number_to_static_string
1548    (number)).
1549
1550    CAVEAT: since the function returns pointers to static data, you
1551    must be careful to copy its result before calling it again.
1552    However, to make it more useful with printf, the function maintains
1553    an internal ring of static buffers to return.  That way things like
1554    printf("%s %s", number_to_static_string (num1),
1555    number_to_static_string (num2)) work as expected.  Three buffers
1556    are currently used, which means that "%s %s %s" will work, but "%s
1557    %s %s %s" won't.  If you need to print more than three wgints,
1558    bump the RING_SIZE (or rethink your message.)  */
1559
1560 char *
1561 number_to_static_string (wgint number)
1562 {
1563   static char ring[RING_SIZE][24];
1564   static int ringpos;
1565   char *buf = ring[ringpos];
1566   number_to_string (buf, number);
1567   ringpos = (ringpos + 1) % RING_SIZE;
1568   return buf;
1569 }
1570 \f
1571 /* Determine the width of the terminal we're running on.  If that's
1572    not possible, return 0.  */
1573
1574 int
1575 determine_screen_width (void)
1576 {
1577   /* If there's a way to get the terminal size using POSIX
1578      tcgetattr(), somebody please tell me.  */
1579 #ifdef TIOCGWINSZ
1580   int fd;
1581   struct winsize wsz;
1582
1583   if (opt.lfilename != NULL)
1584     return 0;
1585
1586   fd = fileno (stderr);
1587   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1588     return 0;                   /* most likely ENOTTY */
1589
1590   return wsz.ws_col;
1591 #elif defined(WINDOWS)
1592   CONSOLE_SCREEN_BUFFER_INFO csbi;
1593   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1594     return 0;
1595   return csbi.dwSize.X;
1596 #else  /* neither TIOCGWINSZ nor WINDOWS */
1597   return 0;
1598 #endif /* neither TIOCGWINSZ nor WINDOWS */
1599 }
1600 \f
1601 /* Whether the rnd system (either rand or [dl]rand48) has been
1602    seeded.  */
1603 static int rnd_seeded;
1604
1605 /* Return a random number between 0 and MAX-1, inclusive.
1606
1607    If the system does not support lrand48 and MAX is greater than the
1608    value of RAND_MAX+1 on the system, the returned value will be in
1609    the range [0, RAND_MAX].  This may be fixed in a future release.
1610    The random number generator is seeded automatically the first time
1611    it is called.
1612
1613    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1614    for cryptography.  It is only meant to be used in situations where
1615    quality of the random numbers returned doesn't really matter.  */
1616
1617 int
1618 random_number (int max)
1619 {
1620 #ifdef HAVE_DRAND48
1621   if (!rnd_seeded)
1622     {
1623       srand48 ((long) time (NULL) ^ (long) getpid ());
1624       rnd_seeded = 1;
1625     }
1626   return lrand48 () % max;
1627 #else  /* not HAVE_DRAND48 */
1628
1629   double bounded;
1630   int rnd;
1631   if (!rnd_seeded)
1632     {
1633       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1634       rnd_seeded = 1;
1635     }
1636   rnd = rand ();
1637
1638   /* Like rand() % max, but uses the high-order bits for better
1639      randomness on architectures where rand() is implemented using a
1640      simple congruential generator.  */
1641
1642   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1643   return (int) bounded;
1644
1645 #endif /* not HAVE_DRAND48 */
1646 }
1647
1648 /* Return a random uniformly distributed floating point number in the
1649    [0, 1) range.  Uses drand48 where available, and a really lame
1650    kludge elsewhere.  */
1651
1652 double
1653 random_float (void)
1654 {
1655 #ifdef HAVE_DRAND48
1656   if (!rnd_seeded)
1657     {
1658       srand48 ((long) time (NULL) ^ (long) getpid ());
1659       rnd_seeded = 1;
1660     }
1661   return drand48 ();
1662 #else  /* not HAVE_DRAND48 */
1663   return (  random_number (10000) / 10000.0
1664           + random_number (10000) / (10000.0 * 10000.0)
1665           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1666           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1667 #endif /* not HAVE_DRAND48 */
1668 }
1669 \f
1670 /* Implementation of run_with_timeout, a generic timeout-forcing
1671    routine for systems with Unix-like signal handling.  */
1672
1673 #ifdef USE_SIGNAL_TIMEOUT
1674 # ifdef HAVE_SIGSETJMP
1675 #  define SETJMP(env) sigsetjmp (env, 1)
1676
1677 static sigjmp_buf run_with_timeout_env;
1678
1679 static void
1680 abort_run_with_timeout (int sig)
1681 {
1682   assert (sig == SIGALRM);
1683   siglongjmp (run_with_timeout_env, -1);
1684 }
1685 # else /* not HAVE_SIGSETJMP */
1686 #  define SETJMP(env) setjmp (env)
1687
1688 static jmp_buf run_with_timeout_env;
1689
1690 static void
1691 abort_run_with_timeout (int sig)
1692 {
1693   assert (sig == SIGALRM);
1694   /* We don't have siglongjmp to preserve the set of blocked signals;
1695      if we longjumped out of the handler at this point, SIGALRM would
1696      remain blocked.  We must unblock it manually. */
1697   int mask = siggetmask ();
1698   mask &= ~sigmask (SIGALRM);
1699   sigsetmask (mask);
1700
1701   /* Now it's safe to longjump. */
1702   longjmp (run_with_timeout_env, -1);
1703 }
1704 # endif /* not HAVE_SIGSETJMP */
1705
1706 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1707    setitimer where available, alarm otherwise.
1708
1709    TIMEOUT should be non-zero.  If the timeout value is so small that
1710    it would be rounded to zero, it is rounded to the least legal value
1711    instead (1us for setitimer, 1s for alarm).  That ensures that
1712    SIGALRM will be delivered in all cases.  */
1713
1714 static void
1715 alarm_set (double timeout)
1716 {
1717 #ifdef ITIMER_REAL
1718   /* Use the modern itimer interface. */
1719   struct itimerval itv;
1720   xzero (itv);
1721   itv.it_value.tv_sec = (long) timeout;
1722   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1723   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1724     /* Ensure that we wait for at least the minimum interval.
1725        Specifying zero would mean "wait forever".  */
1726     itv.it_value.tv_usec = 1;
1727   setitimer (ITIMER_REAL, &itv, NULL);
1728 #else  /* not ITIMER_REAL */
1729   /* Use the old alarm() interface. */
1730   int secs = (int) timeout;
1731   if (secs == 0)
1732     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1733        because alarm(0) means "never deliver the alarm", i.e. "wait
1734        forever", which is not what someone who specifies a 0.5s
1735        timeout would expect.  */
1736     secs = 1;
1737   alarm (secs);
1738 #endif /* not ITIMER_REAL */
1739 }
1740
1741 /* Cancel the alarm set with alarm_set. */
1742
1743 static void
1744 alarm_cancel (void)
1745 {
1746 #ifdef ITIMER_REAL
1747   struct itimerval disable;
1748   xzero (disable);
1749   setitimer (ITIMER_REAL, &disable, NULL);
1750 #else  /* not ITIMER_REAL */
1751   alarm (0);
1752 #endif /* not ITIMER_REAL */
1753 }
1754
1755 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1756    seconds.  Returns true if the function was interrupted with a
1757    timeout, false otherwise.
1758
1759    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1760    using setitimer() or alarm().  The timeout is enforced by
1761    longjumping out of the SIGALRM handler.  This has several
1762    advantages compared to the traditional approach of relying on
1763    signals causing system calls to exit with EINTR:
1764
1765      * The callback function is *forcibly* interrupted after the
1766        timeout expires, (almost) regardless of what it was doing and
1767        whether it was in a syscall.  For example, a calculation that
1768        takes a long time is interrupted as reliably as an IO
1769        operation.
1770
1771      * It works with both SYSV and BSD signals because it doesn't
1772        depend on the default setting of SA_RESTART.
1773
1774      * It doesn't require special handler setup beyond a simple call
1775        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1776        optional.)
1777
1778    The only downside is that, if FUN allocates internal resources that
1779    are normally freed prior to exit from the functions, they will be
1780    lost in case of timeout.  */
1781
1782 bool
1783 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1784 {
1785   int saved_errno;
1786
1787   if (timeout == 0)
1788     {
1789       fun (arg);
1790       return false;
1791     }
1792
1793   signal (SIGALRM, abort_run_with_timeout);
1794   if (SETJMP (run_with_timeout_env) != 0)
1795     {
1796       /* Longjumped out of FUN with a timeout. */
1797       signal (SIGALRM, SIG_DFL);
1798       return true;
1799     }
1800   alarm_set (timeout);
1801   fun (arg);
1802
1803   /* Preserve errno in case alarm() or signal() modifies it. */
1804   saved_errno = errno;
1805   alarm_cancel ();
1806   signal (SIGALRM, SIG_DFL);
1807   errno = saved_errno;
1808
1809   return false;
1810 }
1811
1812 #else  /* not USE_SIGNAL_TIMEOUT */
1813
1814 #ifndef WINDOWS
1815 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1816    define it under Windows, because Windows has its own version of
1817    run_with_timeout that uses threads.  */
1818
1819 bool
1820 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1821 {
1822   fun (arg);
1823   return false;
1824 }
1825 #endif /* not WINDOWS */
1826 #endif /* not USE_SIGNAL_TIMEOUT */
1827 \f
1828 #ifndef WINDOWS
1829
1830 /* Sleep the specified amount of seconds.  On machines without
1831    nanosleep(), this may sleep shorter if interrupted by signals.  */
1832
1833 void
1834 xsleep (double seconds)
1835 {
1836 #ifdef HAVE_NANOSLEEP
1837   /* nanosleep is the preferred interface because it offers high
1838      accuracy and, more importantly, because it allows us to reliably
1839      restart receiving a signal such as SIGWINCH.  (There was an
1840      actual Debian bug report about --limit-rate malfunctioning while
1841      the terminal was being resized.)  */
1842   struct timespec sleep, remaining;
1843   sleep.tv_sec = (long) seconds;
1844   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1845   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1846     /* If nanosleep has been interrupted by a signal, adjust the
1847        sleeping period and return to sleep.  */
1848     sleep = remaining;
1849 #elif defined(HAVE_USLEEP)
1850   /* If usleep is available, use it in preference to select.  */
1851   if (seconds >= 1)
1852     {
1853       /* On some systems, usleep cannot handle values larger than
1854          1,000,000.  If the period is larger than that, use sleep
1855          first, then add usleep for subsecond accuracy.  */
1856       sleep (seconds);
1857       seconds -= (long) seconds;
1858     }
1859   usleep (seconds * 1000000);
1860 #else /* fall back select */
1861   /* Note that, although Windows supports select, it can't be used to
1862      implement sleeping because Winsock's select doesn't implement
1863      timeout when it is passed NULL pointers for all fd sets.  (But it
1864      does under Cygwin, which implements Unix-compatible select.)  */
1865   struct timeval sleep;
1866   sleep.tv_sec = (long) seconds;
1867   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1868   select (0, NULL, NULL, NULL, &sleep);
1869   /* If select returns -1 and errno is EINTR, it means we were
1870      interrupted by a signal.  But without knowing how long we've
1871      actually slept, we can't return to sleep.  Using gettimeofday to
1872      track sleeps is slow and unreliable due to clock skew.  */
1873 #endif
1874 }
1875
1876 #endif /* not WINDOWS */
1877
1878 /* Encode the octets in DATA of length LENGTH to base64 format,
1879    storing the result to DEST.  The output will be zero-terminated,
1880    and must point to a writable buffer of at least
1881    1+BASE64_LENGTH(length) bytes.  The function returns the length of
1882    the resulting base64 data, not counting the terminating zero.
1883
1884    This implementation does not emit newlines after 76 characters of
1885    base64 data.  */
1886
1887 int
1888 base64_encode (const void *data, int length, char *dest)
1889 {
1890   /* Conversion table.  */
1891   static const char tbl[64] = {
1892     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1893     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1894     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1895     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1896   };
1897   /* Access bytes in DATA as unsigned char, otherwise the shifts below
1898      don't work for data with MSB set. */
1899   const unsigned char *s = data;
1900   /* Theoretical ANSI violation when length < 3. */
1901   const unsigned char *end = (const unsigned char *) data + length - 2;
1902   char *p = dest;
1903
1904   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1905   for (; s < end; s += 3)
1906     {
1907       *p++ = tbl[s[0] >> 2];
1908       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1909       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1910       *p++ = tbl[s[2] & 0x3f];
1911     }
1912
1913   /* Pad the result if necessary...  */
1914   switch (length % 3)
1915     {
1916     case 1:
1917       *p++ = tbl[s[0] >> 2];
1918       *p++ = tbl[(s[0] & 3) << 4];
1919       *p++ = '=';
1920       *p++ = '=';
1921       break;
1922     case 2:
1923       *p++ = tbl[s[0] >> 2];
1924       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1925       *p++ = tbl[((s[1] & 0xf) << 2)];
1926       *p++ = '=';
1927       break;
1928     }
1929   /* ...and zero-terminate it.  */
1930   *p = '\0';
1931
1932   return p - dest;
1933 }
1934
1935 /* Store in C the next non-whitespace character from the string, or \0
1936    when end of string is reached.  */
1937 #define NEXT_CHAR(c, p) do {                    \
1938   c = (unsigned char) *p++;                     \
1939 } while (ISSPACE (c))
1940
1941 #define IS_ASCII(c) (((c) & 0x80) == 0)
1942
1943 /* Decode data from BASE64 (a null-terminated string) into memory
1944    pointed to by DEST.  DEST is assumed to be large enough to
1945    accomodate the decoded data, which is guaranteed to be no more than
1946    3/4*strlen(base64).
1947
1948    Since DEST is assumed to contain binary data, it is not
1949    NUL-terminated.  The function returns the length of the data
1950    written to TO.  -1 is returned in case of error caused by malformed
1951    base64 input.
1952
1953    This function originates from Free Recode.  */
1954
1955 int
1956 base64_decode (const char *base64, void *dest)
1957 {
1958   /* Table of base64 values for first 128 characters.  Note that this
1959      assumes ASCII (but so does Wget in other places).  */
1960   static const signed char base64_char_to_value[128] =
1961     {
1962       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
1963       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
1964       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
1965       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
1966       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
1967       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
1968       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
1969       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
1970       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
1971       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
1972       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
1973       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
1974       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
1975     };
1976 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
1977 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
1978
1979   const char *p = base64;
1980   char *q = dest;
1981
1982   while (1)
1983     {
1984       unsigned char c;
1985       unsigned long value;
1986
1987       /* Process first byte of a quadruplet.  */
1988       NEXT_CHAR (c, p);
1989       if (!c)
1990         break;
1991       if (c == '=' || !IS_BASE64 (c))
1992         return -1;              /* illegal char while decoding base64 */
1993       value = BASE64_CHAR_TO_VALUE (c) << 18;
1994
1995       /* Process second byte of a quadruplet.  */
1996       NEXT_CHAR (c, p);
1997       if (!c)
1998         return -1;              /* premature EOF while decoding base64 */
1999       if (c == '=' || !IS_BASE64 (c))
2000         return -1;              /* illegal char while decoding base64 */
2001       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2002       *q++ = value >> 16;
2003
2004       /* Process third byte of a quadruplet.  */
2005       NEXT_CHAR (c, p);
2006       if (!c)
2007         return -1;              /* premature EOF while decoding base64 */
2008       if (!IS_BASE64 (c))
2009         return -1;              /* illegal char while decoding base64 */
2010
2011       if (c == '=')
2012         {
2013           NEXT_CHAR (c, p);
2014           if (!c)
2015             return -1;          /* premature EOF while decoding base64 */
2016           if (c != '=')
2017             return -1;          /* padding `=' expected but not found */
2018           continue;
2019         }
2020
2021       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2022       *q++ = 0xff & value >> 8;
2023
2024       /* Process fourth byte of a quadruplet.  */
2025       NEXT_CHAR (c, p);
2026       if (!c)
2027         return -1;              /* premature EOF while decoding base64 */
2028       if (c == '=')
2029         continue;
2030       if (!IS_BASE64 (c))
2031         return -1;              /* illegal char while decoding base64 */
2032
2033       value |= BASE64_CHAR_TO_VALUE (c);
2034       *q++ = 0xff & value;
2035     }
2036 #undef IS_BASE64
2037 #undef BASE64_CHAR_TO_VALUE
2038
2039   return q - (char *) dest;
2040 }
2041
2042 #undef IS_ASCII
2043 #undef NEXT_CHAR
2044 \f
2045 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2046    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2047
2048 static void
2049 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2050                     int (*cmpfun) (const void *, const void *))
2051 {
2052 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2053   if (from < to)
2054     {
2055       size_t i, j, k;
2056       size_t mid = (to + from) / 2;
2057       mergesort_internal (base, temp, size, from, mid, cmpfun);
2058       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2059       i = from;
2060       j = mid + 1;
2061       for (k = from; (i <= mid) && (j <= to); k++)
2062         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2063           memcpy (ELT (temp, k), ELT (base, i++), size);
2064         else
2065           memcpy (ELT (temp, k), ELT (base, j++), size);
2066       while (i <= mid)
2067         memcpy (ELT (temp, k++), ELT (base, i++), size);
2068       while (j <= to)
2069         memcpy (ELT (temp, k++), ELT (base, j++), size);
2070       for (k = from; k <= to; k++)
2071         memcpy (ELT (base, k), ELT (temp, k), size);
2072     }
2073 #undef ELT
2074 }
2075
2076 /* Stable sort with interface exactly like standard library's qsort.
2077    Uses mergesort internally, allocating temporary storage with
2078    alloca.  */
2079
2080 void
2081 stable_sort (void *base, size_t nmemb, size_t size,
2082              int (*cmpfun) (const void *, const void *))
2083 {
2084   if (size > 1)
2085     {
2086       void *temp = alloca (nmemb * size * sizeof (void *));
2087       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2088     }
2089 }
2090 \f
2091 /* Print a decimal number.  If it is equal to or larger than ten, the
2092    number is rounded.  Otherwise it is printed with one significant
2093    digit without trailing zeros and with no more than three fractional
2094    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2095    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2096
2097    This is useful for displaying durations because it provides
2098    order-of-magnitude information without unnecessary clutter --
2099    long-running downloads are shown without the fractional part, and
2100    short ones still retain one significant digit.  */
2101
2102 const char *
2103 print_decimal (double number)
2104 {
2105   static char buf[32];
2106   double n = number >= 0 ? number : -number;
2107
2108   if (n >= 9.95)
2109     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2110        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2111     snprintf (buf, sizeof buf, "%.0f", number);
2112   else if (n >= 0.95)
2113     snprintf (buf, sizeof buf, "%.1f", number);
2114   else if (n >= 0.001)
2115     snprintf (buf, sizeof buf, "%.1g", number);
2116   else if (n >= 0.0005)
2117     /* round [0.0005, 0.001) to 0.001 */
2118     snprintf (buf, sizeof buf, "%.3f", number);
2119   else
2120     /* print numbers close to 0 as 0, not 0.000 */
2121     strcpy (buf, "0");
2122
2123   return buf;
2124 }
2125
2126 #ifdef TESTING
2127
2128 const char *
2129 test_subdir_p()
2130 {
2131   int i;
2132   struct {
2133     char *d1;
2134     char *d2;
2135     bool result;
2136   } test_array[] = {
2137     { "/somedir", "/somedir", true },
2138     { "/somedir", "/somedir/d2", true },
2139     { "/somedir/d1", "/somedir", false },
2140   };
2141
2142   for (i = 0; i < countof(test_array); ++i)
2143     {
2144       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2145
2146       mu_assert ("test_subdir_p: wrong result",
2147                  res == test_array[i].result);
2148     }
2149
2150   return NULL;
2151 }
2152
2153 const char *
2154 test_dir_matches_p()
2155 {
2156   int i;
2157   struct {
2158     char *dirlist[3];
2159     char *dir;
2160     bool result;
2161   } test_array[] = {
2162     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2163     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2164     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2165     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2166     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2167   };
2168
2169   for (i = 0; i < countof(test_array); ++i)
2170     {
2171       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2172
2173       mu_assert ("test_dir_matches_p: wrong result",
2174                  res == test_array[i].result);
2175     }
2176
2177   return NULL;
2178 }
2179
2180 #endif /* TESTING */
2181