sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
   3    2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 #include "wget.h"
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #include <time.h>
  37 #ifdef HAVE_SYS_TIME_H
  38 # include <sys/time.h>
  39 #endif
  40 #ifdef HAVE_UNISTD_H
  41 # include <unistd.h>
  42 #endif
  43 #ifdef HAVE_MMAP
  44 # include <sys/mman.h>
  45 #endif
  46 #ifdef HAVE_PROCESS_H
  47 # include <process.h>  /* getpid() */
  48 #endif
  49 #ifdef HAVE_UTIME_H
  50 # include <utime.h>
  51 #endif
  52 #ifdef HAVE_SYS_UTIME_H
  53 # include <sys/utime.h>
  54 #endif
  55 #include <errno.h>
  56 #include <fcntl.h>
  57 #include <assert.h>
  58 #include <stdarg.h>
  59 #include <locale.h>
  60
  61 /* For TIOCGWINSZ and friends: */
  62 #ifdef HAVE_SYS_IOCTL_H
  63 # include <sys/ioctl.h>
  64 #endif
  65 #ifdef HAVE_TERMIOS_H
  66 # include <termios.h>
  67 #endif
  68
  69 /* Needed for Unix version of run_with_timeout. */
  70 #include <signal.h>
  71 #include <setjmp.h>
  72
  73 #ifndef HAVE_SIGSETJMP
  74 /* If sigsetjmp is a macro, configure won't pick it up. */
  75 # ifdef sigsetjmp
  76 #  define HAVE_SIGSETJMP
  77 # endif
  78 #endif
  79
  80 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  81 # define USE_SIGNAL_TIMEOUT
  82 #endif
  83
  84 #include "utils.h"
  85 #include "hash.h"
  86
  87 #ifdef TESTING
  88 #include "test.h"
  89 #endif
  90
  91 /* Utility function: like xstrdup(), but also lowercases S.  */
  92
  93 char *
  94 xstrdup_lower (const char *s)
  95 {
  96   char *copy = xstrdup (s);
  97   char *p = copy;
  98   for (; *p; p++)
  99     *p = c_tolower (*p);
 100   return copy;
 101 }
 102
 103 /* Copy the string formed by two pointers (one on the beginning, other
 104    on the char after the last char) to a new, malloc-ed location.
 105    0-terminate it.  */
 106 char *
 107 strdupdelim (const char *beg, const char *end)
 108 {
 109   char *res = xmalloc (end - beg + 1);
 110   memcpy (res, beg, end - beg);
 111   res[end - beg] = '\0';
 112   return res;
 113 }
 114
 115 /* Parse a string containing comma-separated elements, and return a
 116    vector of char pointers with the elements.  Spaces following the
 117    commas are ignored.  */
 118 char **
 119 sepstring (const char *s)
 120 {
 121   char **res;
 122   const char *p;
 123   int i = 0;
 124
 125   if (!s || !*s)
 126     return NULL;
 127   res = NULL;
 128   p = s;
 129   while (*s)
 130     {
 131       if (*s == ',')
 132         {
 133           res = xrealloc (res, (i + 2) * sizeof (char *));
 134           res[i] = strdupdelim (p, s);
 135           res[++i] = NULL;
 136           ++s;
 137           /* Skip the blanks following the ','.  */
 138           while (c_isspace (*s))
 139             ++s;
 140           p = s;
 141         }
 142       else
 143         ++s;
 144     }
 145   res = xrealloc (res, (i + 2) * sizeof (char *));
 146   res[i] = strdupdelim (p, s);
 147   res[i + 1] = NULL;
 148   return res;
 149 }
 150 \f
 151 /* Like sprintf, but prints into a string of sufficient size freshly
 152    allocated with malloc, which is returned.  If unable to print due
 153    to invalid format, returns NULL.  Inability to allocate needed
 154    memory results in abort, as with xmalloc.  This is in spirit
 155    similar to the GNU/BSD extension asprintf, but somewhat easier to
 156    use.
 157
 158    Internally the function either calls vasprintf or loops around
 159    vsnprintf until the correct size is found.  Since Wget also ships a
 160    fallback implementation of vsnprintf, this should be portable.  */
 161
 162 /* Constant is using for limits memory allocation for text buffer.
 163    Applicable in situation when: vasprintf is not available in the system
 164    and vsnprintf return -1 when long line is truncated (in old versions of
 165    glibc and in other system where C99 doesn`t support) */
 166
 167 #define FMT_MAX_LENGTH 1048576
 168
 169 char *
 170 aprintf (const char *fmt, ...)
 171 {
 172 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 173   /* Use vasprintf. */
 174   int ret;
 175   va_list args;
 176   char *str;
 177   va_start (args, fmt);
 178   ret = vasprintf (&str, fmt, args);
 179   va_end (args);
 180   if (ret < 0 && errno == ENOMEM)
 181     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 182                                                       with xmalloc/xrealloc */
 183   else if (ret < 0)
 184     return NULL;
 185   return str;
 186 #else  /* not HAVE_VASPRINTF */
 187
 188   /* vasprintf is unavailable.  snprintf into a small buffer and
 189      resize it as necessary. */
 190   int size = 32;
 191   char *str = xmalloc (size);
 192
 193   /* #### This code will infloop and eventually abort in xrealloc if
 194      passed a FMT that causes snprintf to consistently return -1.  */
 195
 196   while (1)
 197     {
 198       int n;
 199       va_list args;
 200
 201       va_start (args, fmt);
 202       n = vsnprintf (str, size, fmt, args);
 203       va_end (args);
 204
 205       /* If the printing worked, return the string. */
 206       if (n > -1 && n < size)
 207         return str;
 208
 209       /* Else try again with a larger buffer. */
 210       if (n > -1)               /* C99 */
 211         size = n + 1;           /* precisely what is needed */
 212       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 213         {                               /* maybe we have some wrong
 214                                            format string? */
 215           logprintf (LOG_ALWAYS,
 216                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 217                        "aborting.\n"),
 218                      exec_name, size);  /* printout a log message */
 219           abort ();                     /* and abort... */
 220         }
 221       else
 222         {
 223           /* else, we continue to grow our
 224            * buffer: Twice the old size. */
 225           size <<= 1;
 226         }
 227       str = xrealloc (str, size);
 228     }
 229 #endif /* not HAVE_VASPRINTF */
 230 }
 231
 232 /* Concatenate the NULL-terminated list of string arguments into
 233    freshly allocated space.  */
 234
 235 char *
 236 concat_strings (const char *str0, ...)
 237 {
 238   va_list args;
 239   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 240   char *ret, *p;
 241
 242   const char *next_str;
 243   int total_length = 0;
 244   int argcount;
 245
 246   /* Calculate the length of and allocate the resulting string. */
 247
 248   argcount = 0;
 249   va_start (args, str0);
 250   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 251     {
 252       int len = strlen (next_str);
 253       if (argcount < countof (saved_lengths))
 254         saved_lengths[argcount++] = len;
 255       total_length += len;
 256     }
 257   va_end (args);
 258   p = ret = xmalloc (total_length + 1);
 259
 260   /* Copy the strings into the allocated space. */
 261
 262   argcount = 0;
 263   va_start (args, str0);
 264   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 265     {
 266       int len;
 267       if (argcount < countof (saved_lengths))
 268         len = saved_lengths[argcount++];
 269       else
 270         len = strlen (next_str);
 271       memcpy (p, next_str, len);
 272       p += len;
 273     }
 274   va_end (args);
 275   *p = '\0';
 276
 277   return ret;
 278 }
 279 \f
 280 /* Format the provided time according to the specified format.  The
 281    format is a string with format elements supported by strftime.  */
 282
 283 static char *
 284 fmttime (time_t t, const char *fmt)
 285 {
 286   static char output[32];
 287   struct tm *tm = localtime(&t);
 288   if (!tm)
 289     abort ();
 290   if (!strftime(output, sizeof(output), fmt, tm))
 291     abort ();
 292   return output;
 293 }
 294
 295 /* Return pointer to a static char[] buffer in which zero-terminated
 296    string-representation of TM (in form hh:mm:ss) is printed.
 297
 298    If TM is NULL, the current time will be used.  */
 299
 300 char *
 301 time_str (time_t t)
 302 {
 303   return fmttime(t, "%H:%M:%S");
 304 }
 305
 306 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 307
 308 char *
 309 datetime_str (time_t t)
 310 {
 311   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 312 }
 313 \f
 314 /* The Windows versions of the following two functions are defined in
 315    mswindows.c. On MSDOS this function should never be called. */
 316
 317 #if !defined(WINDOWS) && !defined(MSDOS)
 318 void
 319 fork_to_background (void)
 320 {
 321   pid_t pid;
 322   /* Whether we arrange our own version of opt.lfilename here.  */
 323   bool logfile_changed = false;
 324
 325   if (!opt.lfilename)
 326     {
 327       /* We must create the file immediately to avoid either a race
 328          condition (which arises from using unique_name and failing to
 329          use fopen_excl) or lying to the user about the log file name
 330          (which arises from using unique_name, printing the name, and
 331          using fopen_excl later on.)  */
 332       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 333       if (new_log_fp)
 334         {
 335           logfile_changed = true;
 336           fclose (new_log_fp);
 337         }
 338     }
 339   pid = fork ();
 340   if (pid < 0)
 341     {
 342       /* parent, error */
 343       perror ("fork");
 344       exit (1);
 345     }
 346   else if (pid != 0)
 347     {
 348       /* parent, no error */
 349       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 350       if (logfile_changed)
 351         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 352       exit (0);                 /* #### should we use _exit()? */
 353     }
 354
 355   /* child: give up the privileges and keep running. */
 356   setsid ();
 357   freopen ("/dev/null", "r", stdin);
 358   freopen ("/dev/null", "w", stdout);
 359   freopen ("/dev/null", "w", stderr);
 360 }
 361 #endif /* !WINDOWS && !MSDOS */
 362 \f
 363 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 364    specified with TM.  The atime ("access time") is set to the current
 365    time.  */
 366
 367 void
 368 touch (const char *file, time_t tm)
 369 {
 370 #ifdef HAVE_STRUCT_UTIMBUF
 371   struct utimbuf times;
 372 #else
 373   struct {
 374     time_t actime;
 375     time_t modtime;
 376   } times;
 377 #endif
 378   times.modtime = tm;
 379   times.actime = time (NULL);
 380   if (utime (file, &times) == -1)
 381     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 382 }
 383
 384 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 385    nothing under MS-Windows.  */
 386 int
 387 remove_link (const char *file)
 388 {
 389   int err = 0;
 390   struct_stat st;
 391
 392   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 393     {
 394       DEBUGP (("Unlinking %s (symlink).\n", file));
 395       err = unlink (file);
 396       if (err != 0)
 397         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 398                    file, strerror (errno));
 399     }
 400   return err;
 401 }
 402
 403 /* Does FILENAME exist?  This is quite a lousy implementation, since
 404    it supplies no error codes -- only a yes-or-no answer.  Thus it
 405    will return that a file does not exist if, e.g., the directory is
 406    unreadable.  I don't mind it too much currently, though.  The
 407    proper way should, of course, be to have a third, error state,
 408    other than true/false, but that would introduce uncalled-for
 409    additional complexity to the callers.  */
 410 bool
 411 file_exists_p (const char *filename)
 412 {
 413 #ifdef HAVE_ACCESS
 414   return access (filename, F_OK) >= 0;
 415 #else
 416   struct_stat buf;
 417   return stat (filename, &buf) >= 0;
 418 #endif
 419 }
 420
 421 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 422    Returns 0 on error.  */
 423 bool
 424 file_non_directory_p (const char *path)
 425 {
 426   struct_stat buf;
 427   /* Use lstat() rather than stat() so that symbolic links pointing to
 428      directories can be identified correctly.  */
 429   if (lstat (path, &buf) != 0)
 430     return false;
 431   return S_ISDIR (buf.st_mode) ? false : true;
 432 }
 433
 434 /* Return the size of file named by FILENAME, or -1 if it cannot be
 435    opened or seeked into. */
 436 wgint
 437 file_size (const char *filename)
 438 {
 439 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 440   wgint size;
 441   /* We use fseek rather than stat to determine the file size because
 442      that way we can also verify that the file is readable without
 443      explicitly checking for permissions.  Inspired by the POST patch
 444      by Arnaud Wylie.  */
 445   FILE *fp = fopen (filename, "rb");
 446   if (!fp)
 447     return -1;
 448   fseeko (fp, 0, SEEK_END);
 449   size = ftello (fp);
 450   fclose (fp);
 451   return size;
 452 #else
 453   struct_stat st;
 454   if (stat (filename, &st) < 0)
 455     return -1;
 456   return st.st_size;
 457 #endif
 458 }
 459
 460 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 461    doesn't exist is found.  Return a freshly allocated copy of the
 462    unused file name.  */
 463
 464 static char *
 465 unique_name_1 (const char *prefix)
 466 {
 467   int count = 1;
 468   int plen = strlen (prefix);
 469   char *template = (char *)alloca (plen + 1 + 24);
 470   char *template_tail = template + plen;
 471
 472   memcpy (template, prefix, plen);
 473   *template_tail++ = '.';
 474
 475   do
 476     number_to_string (template_tail, count++);
 477   while (file_exists_p (template));
 478
 479   return xstrdup (template);
 480 }
 481
 482 /* Return a unique file name, based on FILE.
 483
 484    More precisely, if FILE doesn't exist, it is returned unmodified.
 485    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 486    file name that doesn't exist is returned.
 487
 488    The resulting file is not created, only verified that it didn't
 489    exist at the point in time when the function was called.
 490    Therefore, where security matters, don't rely that the file created
 491    by this function exists until you open it with O_EXCL or
 492    equivalent.
 493
 494    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 495    string.  Otherwise, it may return FILE if the file doesn't exist
 496    (and therefore doesn't need changing).  */
 497
 498 char *
 499 unique_name (const char *file, bool allow_passthrough)
 500 {
 501   /* If the FILE itself doesn't exist, return it without
 502      modification. */
 503   if (!file_exists_p (file))
 504     return allow_passthrough ? (char *)file : xstrdup (file);
 505
 506   /* Otherwise, find a numeric suffix that results in unused file name
 507      and return it.  */
 508   return unique_name_1 (file);
 509 }
 510
 511 /* Create a file based on NAME, except without overwriting an existing
 512    file with that name.  Providing O_EXCL is correctly implemented,
 513    this function does not have the race condition associated with
 514    opening the file returned by unique_name.  */
 515
 516 FILE *
 517 unique_create (const char *name, bool binary, char **opened_name)
 518 {
 519   /* unique file name, based on NAME */
 520   char *uname = unique_name (name, false);
 521   FILE *fp;
 522   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 523     {
 524       xfree (uname);
 525       uname = unique_name (name, false);
 526     }
 527   if (opened_name && fp != NULL)
 528     {
 529       if (fp)
 530         *opened_name = uname;
 531       else
 532         {
 533           *opened_name = NULL;
 534           xfree (uname);
 535         }
 536     }
 537   else
 538     xfree (uname);
 539   return fp;
 540 }
 541
 542 /* Open the file for writing, with the addition that the file is
 543    opened "exclusively".  This means that, if the file already exists,
 544    this function will *fail* and errno will be set to EEXIST.  If
 545    BINARY is set, the file will be opened in binary mode, equivalent
 546    to fopen's "wb".
 547
 548    If opening the file fails for any reason, including the file having
 549    previously existed, this function returns NULL and sets errno
 550    appropriately.  */
 551
 552 FILE *
 553 fopen_excl (const char *fname, bool binary)
 554 {
 555   int fd;
 556 #ifdef O_EXCL
 557   int flags = O_WRONLY | O_CREAT | O_EXCL;
 558 # ifdef O_BINARY
 559   if (binary)
 560     flags |= O_BINARY;
 561 # endif
 562   fd = open (fname, flags, 0666);
 563   if (fd < 0)
 564     return NULL;
 565   return fdopen (fd, binary ? "wb" : "w");
 566 #else  /* not O_EXCL */
 567   /* Manually check whether the file exists.  This is prone to race
 568      conditions, but systems without O_EXCL haven't deserved
 569      better.  */
 570   if (file_exists_p (fname))
 571     {
 572       errno = EEXIST;
 573       return NULL;
 574     }
 575   return fopen (fname, binary ? "wb" : "w");
 576 #endif /* not O_EXCL */
 577 }
 578 \f
 579 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 580    are missing, create them first.  In case any mkdir() call fails,
 581    return its error status.  Returns 0 on successful completion.
 582
 583    The behaviour of this function should be identical to the behaviour
 584    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 585 int
 586 make_directory (const char *directory)
 587 {
 588   int i, ret, quit = 0;
 589   char *dir;
 590
 591   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 592      function is unsafe if called with a read-only char *argument.  */
 593   STRDUP_ALLOCA (dir, directory);
 594
 595   /* If the first character of dir is '/', skip it (and thus enable
 596      creation of absolute-pathname directories.  */
 597   for (i = (*dir == '/'); 1; ++i)
 598     {
 599       for (; dir[i] && dir[i] != '/'; i++)
 600         ;
 601       if (!dir[i])
 602         quit = 1;
 603       dir[i] = '\0';
 604       /* Check whether the directory already exists.  Allow creation of
 605          of intermediate directories to fail, as the initial path components
 606          are not necessarily directories!  */
 607       if (!file_exists_p (dir))
 608         ret = mkdir (dir, 0777);
 609       else
 610         ret = 0;
 611       if (quit)
 612         break;
 613       else
 614         dir[i] = '/';
 615     }
 616   return ret;
 617 }
 618
 619 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 620    should be a file name.
 621
 622    file_merge("/foo/bar", "baz")  => "/foo/baz"
 623    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 624    file_merge("foo", "bar")       => "bar"
 625
 626    In other words, it's a simpler and gentler version of uri_merge.  */
 627
 628 char *
 629 file_merge (const char *base, const char *file)
 630 {
 631   char *result;
 632   const char *cut = (const char *)strrchr (base, '/');
 633
 634   if (!cut)
 635     return xstrdup (file);
 636
 637   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 638   memcpy (result, base, cut - base);
 639   result[cut - base] = '/';
 640   strcpy (result + (cut - base) + 1, file);
 641
 642   return result;
 643 }
 644 \f
 645 /* Like fnmatch, but performs a case-insensitive match.  */
 646
 647 int
 648 fnmatch_nocase (const char *pattern, const char *string, int flags)
 649 {
 650 #ifdef FNM_CASEFOLD
 651   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 652      also present on *BSD platforms, and possibly elsewhere.  */
 653   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 654 #else
 655   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 656   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 657   char *strcopy = (char *) alloca (strlen (string) + 1);
 658   char *p;
 659   for (p = patcopy; *pattern; pattern++, p++)
 660     *p = c_tolower (*pattern);
 661   *p = '\0';
 662   for (p = strcopy; *string; string++, p++)
 663     *p = c_tolower (*string);
 664   *p = '\0';
 665   return fnmatch (patcopy, strcopy, flags);
 666 #endif
 667 }
 668
 669 static bool in_acclist (const char *const *, const char *, bool);
 670
 671 /* Determine whether a file is acceptable to be followed, according to
 672    lists of patterns to accept/reject.  */
 673 bool
 674 acceptable (const char *s)
 675 {
 676   int l = strlen (s);
 677
 678   while (l && s[l] != '/')
 679     --l;
 680   if (s[l] == '/')
 681     s += (l + 1);
 682   if (opt.accepts)
 683     {
 684       if (opt.rejects)
 685         return (in_acclist ((const char *const *)opt.accepts, s, true)
 686                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 687       else
 688         return in_acclist ((const char *const *)opt.accepts, s, true);
 689     }
 690   else if (opt.rejects)
 691     return !in_acclist ((const char *const *)opt.rejects, s, true);
 692   return true;
 693 }
 694
 695 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 696    will return true if and only if D2 begins with `/something/' or is exactly
 697    '/something'.  */
 698 bool
 699 subdir_p (const char *d1, const char *d2)
 700 {
 701   if (*d1 == '\0')
 702     return true;
 703   if (!opt.ignore_case)
 704     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 705       ;
 706   else
 707     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 708       ;
 709
 710   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 711 }
 712
 713 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 714    first element that matches DIR, through wildcards or front comparison (as
 715    appropriate).  */
 716 static bool
 717 dir_matches_p (char **dirlist, const char *dir)
 718 {
 719   char **x;
 720   int (*matcher) (const char *, const char *, int)
 721     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 722
 723   for (x = dirlist; *x; x++)
 724     {
 725       /* Remove leading '/' */
 726       char *p = *x + (**x == '/');
 727       if (has_wildcards_p (p))
 728         {
 729           if (matcher (p, dir, FNM_PATHNAME) == 0)
 730             break;
 731         }
 732       else
 733         {
 734           if (subdir_p (p, dir))
 735             break;
 736         }
 737     }
 738
 739   return *x ? true : false;
 740 }
 741
 742 /* Returns whether DIRECTORY is acceptable for download, wrt the
 743    include/exclude lists.
 744
 745    The leading `/' is ignored in paths; relative and absolute paths
 746    may be freely intermixed.  */
 747
 748 bool
 749 accdir (const char *directory)
 750 {
 751   /* Remove starting '/'.  */
 752   if (*directory == '/')
 753     ++directory;
 754   if (opt.includes)
 755     {
 756       if (!dir_matches_p (opt.includes, directory))
 757         return false;
 758     }
 759   if (opt.excludes)
 760     {
 761       if (dir_matches_p (opt.excludes, directory))
 762         return false;
 763     }
 764   return true;
 765 }
 766
 767 /* Return true if STRING ends with TAIL.  For instance:
 768
 769    match_tail ("abc", "bc", false)  -> 1
 770    match_tail ("abc", "ab", false)  -> 0
 771    match_tail ("abc", "abc", false) -> 1
 772
 773    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 774
 775 bool
 776 match_tail (const char *string, const char *tail, bool fold_case)
 777 {
 778   int i, j;
 779
 780   /* We want this to be fast, so we code two loops, one with
 781      case-folding, one without. */
 782
 783   if (!fold_case)
 784     {
 785       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 786         if (string[i] != tail[j])
 787           break;
 788     }
 789   else
 790     {
 791       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 792         if (c_tolower (string[i]) != c_tolower (tail[j]))
 793           break;
 794     }
 795
 796   /* If the tail was exhausted, the match was succesful.  */
 797   if (j == -1)
 798     return true;
 799   else
 800     return false;
 801 }
 802
 803 /* Checks whether string S matches each element of ACCEPTS.  A list
 804    element are matched either with fnmatch() or match_tail(),
 805    according to whether the element contains wildcards or not.
 806
 807    If the BACKWARD is false, don't do backward comparison -- just compare
 808    them normally.  */
 809 static bool
 810 in_acclist (const char *const *accepts, const char *s, bool backward)
 811 {
 812   for (; *accepts; accepts++)
 813     {
 814       if (has_wildcards_p (*accepts))
 815         {
 816           int res = opt.ignore_case
 817             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 818           /* fnmatch returns 0 if the pattern *does* match the string.  */
 819           if (res == 0)
 820             return true;
 821         }
 822       else
 823         {
 824           if (backward)
 825             {
 826               if (match_tail (s, *accepts, opt.ignore_case))
 827                 return true;
 828             }
 829           else
 830             {
 831               int cmp = opt.ignore_case
 832                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 833               if (cmp == 0)
 834                 return true;
 835             }
 836         }
 837     }
 838   return false;
 839 }
 840
 841 /* Return the location of STR's suffix (file extension).  Examples:
 842    suffix ("foo.bar")       -> "bar"
 843    suffix ("foo.bar.baz")   -> "baz"
 844    suffix ("/foo/bar")      -> NULL
 845    suffix ("/foo.bar/baz")  -> NULL  */
 846 char *
 847 suffix (const char *str)
 848 {
 849   int i;
 850
 851   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 852     ;
 853
 854   if (str[i++] == '.')
 855     return (char *)str + i;
 856   else
 857     return NULL;
 858 }
 859
 860 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 861    `]').  */
 862
 863 bool
 864 has_wildcards_p (const char *s)
 865 {
 866   for (; *s; s++)
 867     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 868       return true;
 869   return false;
 870 }
 871
 872 /* Return true if FNAME ends with a typical HTML suffix.  The
 873    following (case-insensitive) suffixes are presumed to be HTML
 874    files:
 875
 876      html
 877      htm
 878      ?html (`?' matches one character)
 879
 880    #### CAVEAT.  This is not necessarily a good indication that FNAME
 881    refers to a file that contains HTML!  */
 882 bool
 883 has_html_suffix_p (const char *fname)
 884 {
 885   char *suf;
 886
 887   if ((suf = suffix (fname)) == NULL)
 888     return false;
 889   if (!strcasecmp (suf, "html"))
 890     return true;
 891   if (!strcasecmp (suf, "htm"))
 892     return true;
 893   if (suf[0] && !strcasecmp (suf + 1, "html"))
 894     return true;
 895   return false;
 896 }
 897
 898 /* Read a line from FP and return the pointer to freshly allocated
 899    storage.  The storage space is obtained through malloc() and should
 900    be freed with free() when it is no longer needed.
 901
 902    The length of the line is not limited, except by available memory.
 903    The newline character at the end of line is retained.  The line is
 904    terminated with a zero character.
 905
 906    After end-of-file is encountered without anything being read, NULL
 907    is returned.  NULL is also returned on error.  To distinguish
 908    between these two cases, use the stdio function ferror().  */
 909
 910 char *
 911 read_whole_line (FILE *fp)
 912 {
 913   int length = 0;
 914   int bufsize = 82;
 915   char *line = xmalloc (bufsize);
 916
 917   while (fgets (line + length, bufsize - length, fp))
 918     {
 919       length += strlen (line + length);
 920       if (length == 0)
 921         /* Possible for example when reading from a binary file where
 922            a line begins with \0.  */
 923         continue;
 924
 925       if (line[length - 1] == '\n')
 926         break;
 927
 928       /* fgets() guarantees to read the whole line, or to use up the
 929          space we've given it.  We can double the buffer
 930          unconditionally.  */
 931       bufsize <<= 1;
 932       line = xrealloc (line, bufsize);
 933     }
 934   if (length == 0 || ferror (fp))
 935     {
 936       xfree (line);
 937       return NULL;
 938     }
 939   if (length + 1 < bufsize)
 940     /* Relieve the memory from our exponential greediness.  We say
 941        `length + 1' because the terminating \0 is not included in
 942        LENGTH.  We don't need to zero-terminate the string ourselves,
 943        though, because fgets() does that.  */
 944     line = xrealloc (line, length + 1);
 945   return line;
 946 }
 947 \f
 948 /* Read FILE into memory.  A pointer to `struct file_memory' are
 949    returned; use struct element `content' to access file contents, and
 950    the element `length' to know the file length.  `content' is *not*
 951    zero-terminated, and you should *not* read or write beyond the [0,
 952    length) range of characters.
 953
 954    After you are done with the file contents, call read_file_free to
 955    release the memory.
 956
 957    Depending on the operating system and the type of file that is
 958    being read, read_file() either mmap's the file into memory, or
 959    reads the file into the core using read().
 960
 961    If file is named "-", fileno(stdin) is used for reading instead.
 962    If you want to read from a real file named "-", use "./-" instead.  */
 963
 964 struct file_memory *
 965 read_file (const char *file)
 966 {
 967   int fd;
 968   struct file_memory *fm;
 969   long size;
 970   bool inhibit_close = false;
 971
 972   /* Some magic in the finest tradition of Perl and its kin: if FILE
 973      is "-", just use stdin.  */
 974   if (HYPHENP (file))
 975     {
 976       fd = fileno (stdin);
 977       inhibit_close = true;
 978       /* Note that we don't inhibit mmap() in this case.  If stdin is
 979          redirected from a regular file, mmap() will still work.  */
 980     }
 981   else
 982     fd = open (file, O_RDONLY);
 983   if (fd < 0)
 984     return NULL;
 985   fm = xnew (struct file_memory);
 986
 987 #ifdef HAVE_MMAP
 988   {
 989     struct_fstat buf;
 990     if (fstat (fd, &buf) < 0)
 991       goto mmap_lose;
 992     fm->length = buf.st_size;
 993     /* NOTE: As far as I know, the callers of this function never
 994        modify the file text.  Relying on this would enable us to
 995        specify PROT_READ and MAP_SHARED for a marginal gain in
 996        efficiency, but at some cost to generality.  */
 997     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 998                         MAP_PRIVATE, fd, 0);
 999     if (fm->content == (char *)MAP_FAILED)
1000       goto mmap_lose;
1001     if (!inhibit_close)
1002       close (fd);
1003
1004     fm->mmap_p = 1;
1005     return fm;
1006   }
1007
1008  mmap_lose:
1009   /* The most common reason why mmap() fails is that FD does not point
1010      to a plain file.  However, it's also possible that mmap() doesn't
1011      work for a particular type of file.  Therefore, whenever mmap()
1012      fails, we just fall back to the regular method.  */
1013 #endif /* HAVE_MMAP */
1014
1015   fm->length = 0;
1016   size = 512;                   /* number of bytes fm->contents can
1017                                    hold at any given time. */
1018   fm->content = xmalloc (size);
1019   while (1)
1020     {
1021       wgint nread;
1022       if (fm->length > size / 2)
1023         {
1024           /* #### I'm not sure whether the whole exponential-growth
1025              thing makes sense with kernel read.  On Linux at least,
1026              read() refuses to read more than 4K from a file at a
1027              single chunk anyway.  But other Unixes might optimize it
1028              better, and it doesn't *hurt* anything, so I'm leaving
1029              it.  */
1030
1031           /* Normally, we grow SIZE exponentially to make the number
1032              of calls to read() and realloc() logarithmic in relation
1033              to file size.  However, read() can read an amount of data
1034              smaller than requested, and it would be unreasonable to
1035              double SIZE every time *something* was read.  Therefore,
1036              we double SIZE only when the length exceeds half of the
1037              entire allocated size.  */
1038           size <<= 1;
1039           fm->content = xrealloc (fm->content, size);
1040         }
1041       nread = read (fd, fm->content + fm->length, size - fm->length);
1042       if (nread > 0)
1043         /* Successful read. */
1044         fm->length += nread;
1045       else if (nread < 0)
1046         /* Error. */
1047         goto lose;
1048       else
1049         /* EOF */
1050         break;
1051     }
1052   if (!inhibit_close)
1053     close (fd);
1054   if (size > fm->length && fm->length != 0)
1055     /* Due to exponential growth of fm->content, the allocated region
1056        might be much larger than what is actually needed.  */
1057     fm->content = xrealloc (fm->content, fm->length);
1058   fm->mmap_p = 0;
1059   return fm;
1060
1061  lose:
1062   if (!inhibit_close)
1063     close (fd);
1064   xfree (fm->content);
1065   xfree (fm);
1066   return NULL;
1067 }
1068
1069 /* Release the resources held by FM.  Specifically, this calls
1070    munmap() or xfree() on fm->content, depending whether mmap or
1071    malloc/read were used to read in the file.  It also frees the
1072    memory needed to hold the FM structure itself.  */
1073
1074 void
1075 read_file_free (struct file_memory *fm)
1076 {
1077 #ifdef HAVE_MMAP
1078   if (fm->mmap_p)
1079     {
1080       munmap (fm->content, fm->length);
1081     }
1082   else
1083 #endif
1084     {
1085       xfree (fm->content);
1086     }
1087   xfree (fm);
1088 }
1089 \f
1090 /* Free the pointers in a NULL-terminated vector of pointers, then
1091    free the pointer itself.  */
1092 void
1093 free_vec (char **vec)
1094 {
1095   if (vec)
1096     {
1097       char **p = vec;
1098       while (*p)
1099         xfree (*p++);
1100       xfree (vec);
1101     }
1102 }
1103
1104 /* Append vector V2 to vector V1.  The function frees V2 and
1105    reallocates V1 (thus you may not use the contents of neither
1106    pointer after the call).  If V1 is NULL, V2 is returned.  */
1107 char **
1108 merge_vecs (char **v1, char **v2)
1109 {
1110   int i, j;
1111
1112   if (!v1)
1113     return v2;
1114   if (!v2)
1115     return v1;
1116   if (!*v2)
1117     {
1118       /* To avoid j == 0 */
1119       xfree (v2);
1120       return v1;
1121     }
1122   /* Count v1.  */
1123   for (i = 0; v1[i]; i++)
1124     ;
1125   /* Count v2.  */
1126   for (j = 0; v2[j]; j++)
1127     ;
1128   /* Reallocate v1.  */
1129   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1130   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1131   xfree (v2);
1132   return v1;
1133 }
1134
1135 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1136    is allocated as needed.  Return the new value of the vector. */
1137
1138 char **
1139 vec_append (char **vec, const char *str)
1140 {
1141   int cnt;                      /* count of vector elements, including
1142                                    the one we're about to append */
1143   if (vec != NULL)
1144     {
1145       for (cnt = 0; vec[cnt]; cnt++)
1146         ;
1147       ++cnt;
1148     }
1149   else
1150     cnt = 1;
1151   /* Reallocate the array to fit the new element and the NULL. */
1152   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1153   /* Append a copy of STR to the vector. */
1154   vec[cnt - 1] = xstrdup (str);
1155   vec[cnt] = NULL;
1156   return vec;
1157 }
1158 \f
1159 /* Sometimes it's useful to create "sets" of strings, i.e. special
1160    hash tables where you want to store strings as keys and merely
1161    query for their existence.  Here is a set of utility routines that
1162    makes that transparent.  */
1163
1164 void
1165 string_set_add (struct hash_table *ht, const char *s)
1166 {
1167   /* First check whether the set element already exists.  If it does,
1168      do nothing so that we don't have to free() the old element and
1169      then strdup() a new one.  */
1170   if (hash_table_contains (ht, s))
1171     return;
1172
1173   /* We use "1" as value.  It provides us a useful and clear arbitrary
1174      value, and it consumes no memory -- the pointers to the same
1175      string "1" will be shared by all the key-value pairs in all `set'
1176      hash tables.  */
1177   hash_table_put (ht, xstrdup (s), "1");
1178 }
1179
1180 /* Synonym for hash_table_contains... */
1181
1182 int
1183 string_set_contains (struct hash_table *ht, const char *s)
1184 {
1185   return hash_table_contains (ht, s);
1186 }
1187
1188 /* Convert the specified string set to array.  ARRAY should be large
1189    enough to hold hash_table_count(ht) char pointers.  */
1190
1191 void string_set_to_array (struct hash_table *ht, char **array)
1192 {
1193   hash_table_iterator iter;
1194   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1195     *array++ = iter.key;
1196 }
1197
1198 /* Free the string set.  This frees both the storage allocated for
1199    keys and the actual hash table.  (hash_table_destroy would only
1200    destroy the hash table.)  */
1201
1202 void
1203 string_set_free (struct hash_table *ht)
1204 {
1205   hash_table_iterator iter;
1206   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1207     xfree (iter.key);
1208   hash_table_destroy (ht);
1209 }
1210
1211 /* Utility function: simply call xfree() on all keys and values of HT.  */
1212
1213 void
1214 free_keys_and_values (struct hash_table *ht)
1215 {
1216   hash_table_iterator iter;
1217   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1218     {
1219       xfree (iter.key);
1220       xfree (iter.value);
1221     }
1222 }
1223 \f
1224 /* Get digit grouping data for thousand separors by calling
1225    localeconv().  The data includes separator string and grouping info
1226    and is cached after the first call to the function.
1227
1228    In locales that don't set a thousand separator (such as the "C"
1229    locale), this forces it to be ",".  We are now only showing
1230    thousand separators in one place, so this shouldn't be a problem in
1231    practice.  */
1232
1233 static void
1234 get_grouping_data (const char **sep, const char **grouping)
1235 {
1236   static const char *cached_sep;
1237   static const char *cached_grouping;
1238   static bool initialized;
1239   if (!initialized)
1240     {
1241       /* Get the grouping info from the locale. */
1242       struct lconv *lconv = localeconv ();
1243       cached_sep = lconv->thousands_sep;
1244       cached_grouping = lconv->grouping;
1245 #if ! USE_NLS_PROGRESS_BAR
1246       /* We can't count column widths, so ensure that the separator
1247        * is single-byte only (let check below determine what byte). */
1248       if (strlen(cached_sep) > 1)
1249         cached_sep = "";
1250 #endif
1251       if (!*cached_sep)
1252         {
1253           /* Many locales (such as "C" or "hr_HR") don't specify
1254              grouping, which we still want to use it for legibility.
1255              In those locales set the sep char to ',', unless that
1256              character is used for decimal point, in which case set it
1257              to ".".  */
1258           if (*lconv->decimal_point != ',')
1259             cached_sep = ",";
1260           else
1261             cached_sep = ".";
1262           cached_grouping = "\x03";
1263         }
1264       initialized = true;
1265     }
1266   *sep = cached_sep;
1267   *grouping = cached_grouping;
1268 }
1269
1270 /* Return a printed representation of N with thousand separators.
1271    This should respect locale settings, with the exception of the "C"
1272    locale which mandates no separator, but we use one anyway.
1273
1274    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1275    the separators because it's too non-portable, and it's hard to test
1276    for this feature at configure time.  Besides, it wouldn't display
1277    separators in the "C" locale, still used by many Unix users.  */
1278
1279 const char *
1280 with_thousand_seps (wgint n)
1281 {
1282   static char outbuf[48];
1283   char *p = outbuf + sizeof outbuf;
1284
1285   /* Info received from locale */
1286   const char *grouping, *sep;
1287   int seplen;
1288
1289   /* State information */
1290   int i = 0, groupsize;
1291   const char *atgroup;
1292
1293   bool negative = n < 0;
1294
1295   /* Initialize grouping data. */
1296   get_grouping_data (&sep, &grouping);
1297   seplen = strlen (sep);
1298   atgroup = grouping;
1299   groupsize = *atgroup++;
1300
1301   /* This would overflow on WGINT_MIN, but printing negative numbers
1302      is not an important goal of this fuinction.  */
1303   if (negative)
1304     n = -n;
1305
1306   /* Write the number into the buffer, backwards, inserting the
1307      separators as necessary.  */
1308   *--p = '\0';
1309   while (1)
1310     {
1311       *--p = n % 10 + '0';
1312       n /= 10;
1313       if (n == 0)
1314         break;
1315       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1316       if (++i == groupsize)
1317         {
1318           if (seplen == 1)
1319             *--p = *sep;
1320           else
1321             memcpy (p -= seplen, sep, seplen);
1322           i = 0;
1323           if (*atgroup)
1324             groupsize = *atgroup++;
1325         }
1326     }
1327   if (negative)
1328     *--p = '-';
1329
1330   return p;
1331 }
1332
1333 /* N, a byte quantity, is converted to a human-readable abberviated
1334    form a la sizes printed by `ls -lh'.  The result is written to a
1335    static buffer, a pointer to which is returned.
1336
1337    Unlike `with_thousand_seps', this approximates to the nearest unit.
1338    Quoting GNU libit: "Most people visually process strings of 3-4
1339    digits effectively, but longer strings of digits are more prone to
1340    misinterpretation.  Hence, converting to an abbreviated form
1341    usually improves readability."
1342
1343    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1344    original computer-related meaning of "powers of 1024".  We don't
1345    use the "*bibyte" names invented in 1998, and seldom used in
1346    practice.  Wikipedia's entry on "binary prefix" discusses this in
1347    some detail.  */
1348
1349 char *
1350 human_readable (HR_NUMTYPE n)
1351 {
1352   /* These suffixes are compatible with those of GNU `ls -lh'. */
1353   static char powers[] =
1354     {
1355       'K',                      /* kilobyte, 2^10 bytes */
1356       'M',                      /* megabyte, 2^20 bytes */
1357       'G',                      /* gigabyte, 2^30 bytes */
1358       'T',                      /* terabyte, 2^40 bytes */
1359       'P',                      /* petabyte, 2^50 bytes */
1360       'E',                      /* exabyte,  2^60 bytes */
1361     };
1362   static char buf[8];
1363   int i;
1364
1365   /* If the quantity is smaller than 1K, just print it. */
1366   if (n < 1024)
1367     {
1368       snprintf (buf, sizeof (buf), "%d", (int) n);
1369       return buf;
1370     }
1371
1372   /* Loop over powers, dividing N with 1024 in each iteration.  This
1373      works unchanged for all sizes of wgint, while still avoiding
1374      non-portable `long double' arithmetic.  */
1375   for (i = 0; i < countof (powers); i++)
1376     {
1377       /* At each iteration N is greater than the *subsequent* power.
1378          That way N/1024.0 produces a decimal number in the units of
1379          *this* power.  */
1380       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1381         {
1382           double val = n / 1024.0;
1383           /* Print values smaller than 10 with one decimal digits, and
1384              others without any decimals.  */
1385           snprintf (buf, sizeof (buf), "%.*f%c",
1386                     val < 10 ? 1 : 0, val, powers[i]);
1387           return buf;
1388         }
1389       n /= 1024;
1390     }
1391   return NULL;                  /* unreached */
1392 }
1393
1394 /* Count the digits in the provided number.  Used to allocate space
1395    when printing numbers.  */
1396
1397 int
1398 numdigit (wgint number)
1399 {
1400   int cnt = 1;
1401   if (number < 0)
1402     ++cnt;                      /* accomodate '-' */
1403   while ((number /= 10) != 0)
1404     ++cnt;
1405   return cnt;
1406 }
1407
1408 #define PR(mask) *p++ = n / (mask) + '0'
1409
1410 /* DIGITS_<D> is used to print a D-digit number and should be called
1411    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1412    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1413    Recursively this continues until DIGITS_1 is invoked.  */
1414
1415 #define DIGITS_1(mask) PR (mask)
1416 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1417 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1418 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1419 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1420 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1421 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1422 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1423 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1424 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1425
1426 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1427
1428 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1429 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1430 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1431 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1432 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1433 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1434 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1435 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1436 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1437
1438 /* Shorthand for casting to wgint. */
1439 #define W wgint
1440
1441 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1442    `sprintf(buffer, "%lld", (long long) number)', only typically much
1443    faster and portable to machines without long long.
1444
1445    The speedup may make a difference in programs that frequently
1446    convert numbers to strings.  Some implementations of sprintf,
1447    particularly the one in some versions of GNU libc, have been known
1448    to be quite slow when converting integers to strings.
1449
1450    Return the pointer to the location where the terminating zero was
1451    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1452    function is done.)
1453
1454    BUFFER should be large enough to accept as many bytes as you expect
1455    the number to take up.  On machines with 64-bit wgints the maximum
1456    needed size is 24 bytes.  That includes the digits needed for the
1457    largest 64-bit number, the `-' sign in case it's negative, and the
1458    terminating '\0'.  */
1459
1460 char *
1461 number_to_string (char *buffer, wgint number)
1462 {
1463   char *p = buffer;
1464   wgint n = number;
1465
1466   int last_digit_char = 0;
1467
1468 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1469   /* We are running in a very strange environment.  Leave the correct
1470      printing to sprintf.  */
1471   p += sprintf (buf, "%j", (intmax_t) (n));
1472 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1473
1474   if (n < 0)
1475     {
1476       if (n < -WGINT_MAX)
1477         {
1478           /* n = -n would overflow because -n would evaluate to a
1479              wgint value larger than WGINT_MAX.  Need to make n
1480              smaller and handle the last digit separately.  */
1481           int last_digit = n % 10;
1482           /* The sign of n%10 is implementation-defined. */
1483           if (last_digit < 0)
1484             last_digit_char = '0' - last_digit;
1485           else
1486             last_digit_char = '0' + last_digit;
1487           /* After n is made smaller, -n will not overflow. */
1488           n /= 10;
1489         }
1490
1491       *p++ = '-';
1492       n = -n;
1493     }
1494
1495   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1496      way printing any N is fully open-coded without a loop or jump.
1497      (Also see description of DIGITS_*.)  */
1498
1499   if      (n < 10)                       DIGITS_1 (1);
1500   else if (n < 100)                      DIGITS_2 (10);
1501   else if (n < 1000)                     DIGITS_3 (100);
1502   else if (n < 10000)                    DIGITS_4 (1000);
1503   else if (n < 100000)                   DIGITS_5 (10000);
1504   else if (n < 1000000)                  DIGITS_6 (100000);
1505   else if (n < 10000000)                 DIGITS_7 (1000000);
1506   else if (n < 100000000)                DIGITS_8 (10000000);
1507   else if (n < 1000000000)               DIGITS_9 (100000000);
1508 #if SIZEOF_WGINT == 4
1509   /* wgint is 32 bits wide: no number has more than 10 digits. */
1510   else                                   DIGITS_10 (1000000000);
1511 #else
1512   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1513      Constants are constructed by compile-time multiplication to avoid
1514      dealing with different notations for 64-bit constants
1515      (nL/nLL/nI64, depending on the compiler and architecture).  */
1516   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1517   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1518   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1519   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1520   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1521   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1522   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1523   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1524   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1525   else                                   DIGITS_19 (1000000000*(W)1000000000);
1526 #endif
1527
1528   if (last_digit_char)
1529     *p++ = last_digit_char;
1530
1531   *p = '\0';
1532 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1533
1534   return p;
1535 }
1536
1537 #undef PR
1538 #undef W
1539 #undef SPRINTF_WGINT
1540 #undef DIGITS_1
1541 #undef DIGITS_2
1542 #undef DIGITS_3
1543 #undef DIGITS_4
1544 #undef DIGITS_5
1545 #undef DIGITS_6
1546 #undef DIGITS_7
1547 #undef DIGITS_8
1548 #undef DIGITS_9
1549 #undef DIGITS_10
1550 #undef DIGITS_11
1551 #undef DIGITS_12
1552 #undef DIGITS_13
1553 #undef DIGITS_14
1554 #undef DIGITS_15
1555 #undef DIGITS_16
1556 #undef DIGITS_17
1557 #undef DIGITS_18
1558 #undef DIGITS_19
1559
1560 #define RING_SIZE 3
1561
1562 /* Print NUMBER to a statically allocated string and return a pointer
1563    to the printed representation.
1564
1565    This function is intended to be used in conjunction with printf.
1566    It is hard to portably print wgint values:
1567     a) you cannot use printf("%ld", number) because wgint can be long
1568        long on 32-bit machines with LFS.
1569     b) you cannot use printf("%lld", number) because NUMBER could be
1570        long on 32-bit machines without LFS, or on 64-bit machines,
1571        which do not require LFS.  Also, Windows doesn't support %lld.
1572     c) you cannot use printf("%j", (int_max_t) number) because not all
1573        versions of printf support "%j", the most notable being the one
1574        on Windows.
1575     d) you cannot #define WGINT_FMT to the appropriate format and use
1576        printf(WGINT_FMT, number) because that would break translations
1577        for user-visible messages, such as printf("Downloaded: %d
1578        bytes\n", number).
1579
1580    What you should use instead is printf("%s", number_to_static_string
1581    (number)).
1582
1583    CAVEAT: since the function returns pointers to static data, you
1584    must be careful to copy its result before calling it again.
1585    However, to make it more useful with printf, the function maintains
1586    an internal ring of static buffers to return.  That way things like
1587    printf("%s %s", number_to_static_string (num1),
1588    number_to_static_string (num2)) work as expected.  Three buffers
1589    are currently used, which means that "%s %s %s" will work, but "%s
1590    %s %s %s" won't.  If you need to print more than three wgints,
1591    bump the RING_SIZE (or rethink your message.)  */
1592
1593 char *
1594 number_to_static_string (wgint number)
1595 {
1596   static char ring[RING_SIZE][24];
1597   static int ringpos;
1598   char *buf = ring[ringpos];
1599   number_to_string (buf, number);
1600   ringpos = (ringpos + 1) % RING_SIZE;
1601   return buf;
1602 }
1603 \f
1604 /* Determine the width of the terminal we're running on.  If that's
1605    not possible, return 0.  */
1606
1607 int
1608 determine_screen_width (void)
1609 {
1610   /* If there's a way to get the terminal size using POSIX
1611      tcgetattr(), somebody please tell me.  */
1612 #ifdef TIOCGWINSZ
1613   int fd;
1614   struct winsize wsz;
1615
1616   if (opt.lfilename != NULL)
1617     return 0;
1618
1619   fd = fileno (stderr);
1620   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1621     return 0;                   /* most likely ENOTTY */
1622
1623   return wsz.ws_col;
1624 #elif defined(WINDOWS)
1625   CONSOLE_SCREEN_BUFFER_INFO csbi;
1626   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1627     return 0;
1628   return csbi.dwSize.X;
1629 #else  /* neither TIOCGWINSZ nor WINDOWS */
1630   return 0;
1631 #endif /* neither TIOCGWINSZ nor WINDOWS */
1632 }
1633 \f
1634 /* Whether the rnd system (either rand or [dl]rand48) has been
1635    seeded.  */
1636 static int rnd_seeded;
1637
1638 /* Return a random number between 0 and MAX-1, inclusive.
1639
1640    If the system does not support lrand48 and MAX is greater than the
1641    value of RAND_MAX+1 on the system, the returned value will be in
1642    the range [0, RAND_MAX].  This may be fixed in a future release.
1643    The random number generator is seeded automatically the first time
1644    it is called.
1645
1646    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1647    for cryptography.  It is only meant to be used in situations where
1648    quality of the random numbers returned doesn't really matter.  */
1649
1650 int
1651 random_number (int max)
1652 {
1653 #ifdef HAVE_DRAND48
1654   if (!rnd_seeded)
1655     {
1656       srand48 ((long) time (NULL) ^ (long) getpid ());
1657       rnd_seeded = 1;
1658     }
1659   return lrand48 () % max;
1660 #else  /* not HAVE_DRAND48 */
1661
1662   double bounded;
1663   int rnd;
1664   if (!rnd_seeded)
1665     {
1666       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1667       rnd_seeded = 1;
1668     }
1669   rnd = rand ();
1670
1671   /* Like rand() % max, but uses the high-order bits for better
1672      randomness on architectures where rand() is implemented using a
1673      simple congruential generator.  */
1674
1675   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1676   return (int) bounded;
1677
1678 #endif /* not HAVE_DRAND48 */
1679 }
1680
1681 /* Return a random uniformly distributed floating point number in the
1682    [0, 1) range.  Uses drand48 where available, and a really lame
1683    kludge elsewhere.  */
1684
1685 double
1686 random_float (void)
1687 {
1688 #ifdef HAVE_DRAND48
1689   if (!rnd_seeded)
1690     {
1691       srand48 ((long) time (NULL) ^ (long) getpid ());
1692       rnd_seeded = 1;
1693     }
1694   return drand48 ();
1695 #else  /* not HAVE_DRAND48 */
1696   return (  random_number (10000) / 10000.0
1697           + random_number (10000) / (10000.0 * 10000.0)
1698           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1699           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1700 #endif /* not HAVE_DRAND48 */
1701 }
1702 \f
1703 /* Implementation of run_with_timeout, a generic timeout-forcing
1704    routine for systems with Unix-like signal handling.  */
1705
1706 #ifdef USE_SIGNAL_TIMEOUT
1707 # ifdef HAVE_SIGSETJMP
1708 #  define SETJMP(env) sigsetjmp (env, 1)
1709
1710 static sigjmp_buf run_with_timeout_env;
1711
1712 static void
1713 abort_run_with_timeout (int sig)
1714 {
1715   assert (sig == SIGALRM);
1716   siglongjmp (run_with_timeout_env, -1);
1717 }
1718 # else /* not HAVE_SIGSETJMP */
1719 #  define SETJMP(env) setjmp (env)
1720
1721 static jmp_buf run_with_timeout_env;
1722
1723 static void
1724 abort_run_with_timeout (int sig)
1725 {
1726   assert (sig == SIGALRM);
1727   /* We don't have siglongjmp to preserve the set of blocked signals;
1728      if we longjumped out of the handler at this point, SIGALRM would
1729      remain blocked.  We must unblock it manually. */
1730   int mask = siggetmask ();
1731   mask &= ~sigmask (SIGALRM);
1732   sigsetmask (mask);
1733
1734   /* Now it's safe to longjump. */
1735   longjmp (run_with_timeout_env, -1);
1736 }
1737 # endif /* not HAVE_SIGSETJMP */
1738
1739 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1740    setitimer where available, alarm otherwise.
1741
1742    TIMEOUT should be non-zero.  If the timeout value is so small that
1743    it would be rounded to zero, it is rounded to the least legal value
1744    instead (1us for setitimer, 1s for alarm).  That ensures that
1745    SIGALRM will be delivered in all cases.  */
1746
1747 static void
1748 alarm_set (double timeout)
1749 {
1750 #ifdef ITIMER_REAL
1751   /* Use the modern itimer interface. */
1752   struct itimerval itv;
1753   xzero (itv);
1754   itv.it_value.tv_sec = (long) timeout;
1755   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1756   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1757     /* Ensure that we wait for at least the minimum interval.
1758        Specifying zero would mean "wait forever".  */
1759     itv.it_value.tv_usec = 1;
1760   setitimer (ITIMER_REAL, &itv, NULL);
1761 #else  /* not ITIMER_REAL */
1762   /* Use the old alarm() interface. */
1763   int secs = (int) timeout;
1764   if (secs == 0)
1765     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1766        because alarm(0) means "never deliver the alarm", i.e. "wait
1767        forever", which is not what someone who specifies a 0.5s
1768        timeout would expect.  */
1769     secs = 1;
1770   alarm (secs);
1771 #endif /* not ITIMER_REAL */
1772 }
1773
1774 /* Cancel the alarm set with alarm_set. */
1775
1776 static void
1777 alarm_cancel (void)
1778 {
1779 #ifdef ITIMER_REAL
1780   struct itimerval disable;
1781   xzero (disable);
1782   setitimer (ITIMER_REAL, &disable, NULL);
1783 #else  /* not ITIMER_REAL */
1784   alarm (0);
1785 #endif /* not ITIMER_REAL */
1786 }
1787
1788 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1789    seconds.  Returns true if the function was interrupted with a
1790    timeout, false otherwise.
1791
1792    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1793    using setitimer() or alarm().  The timeout is enforced by
1794    longjumping out of the SIGALRM handler.  This has several
1795    advantages compared to the traditional approach of relying on
1796    signals causing system calls to exit with EINTR:
1797
1798      * The callback function is *forcibly* interrupted after the
1799        timeout expires, (almost) regardless of what it was doing and
1800        whether it was in a syscall.  For example, a calculation that
1801        takes a long time is interrupted as reliably as an IO
1802        operation.
1803
1804      * It works with both SYSV and BSD signals because it doesn't
1805        depend on the default setting of SA_RESTART.
1806
1807      * It doesn't require special handler setup beyond a simple call
1808        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1809        optional.)
1810
1811    The only downside is that, if FUN allocates internal resources that
1812    are normally freed prior to exit from the functions, they will be
1813    lost in case of timeout.  */
1814
1815 bool
1816 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1817 {
1818   int saved_errno;
1819
1820   if (timeout == 0)
1821     {
1822       fun (arg);
1823       return false;
1824     }
1825
1826   signal (SIGALRM, abort_run_with_timeout);
1827   if (SETJMP (run_with_timeout_env) != 0)
1828     {
1829       /* Longjumped out of FUN with a timeout. */
1830       signal (SIGALRM, SIG_DFL);
1831       return true;
1832     }
1833   alarm_set (timeout);
1834   fun (arg);
1835
1836   /* Preserve errno in case alarm() or signal() modifies it. */
1837   saved_errno = errno;
1838   alarm_cancel ();
1839   signal (SIGALRM, SIG_DFL);
1840   errno = saved_errno;
1841
1842   return false;
1843 }
1844
1845 #else  /* not USE_SIGNAL_TIMEOUT */
1846
1847 #ifndef WINDOWS
1848 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1849    define it under Windows, because Windows has its own version of
1850    run_with_timeout that uses threads.  */
1851
1852 bool
1853 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1854 {
1855   fun (arg);
1856   return false;
1857 }
1858 #endif /* not WINDOWS */
1859 #endif /* not USE_SIGNAL_TIMEOUT */
1860 \f
1861 #ifndef WINDOWS
1862
1863 /* Sleep the specified amount of seconds.  On machines without
1864    nanosleep(), this may sleep shorter if interrupted by signals.  */
1865
1866 void
1867 xsleep (double seconds)
1868 {
1869 #ifdef HAVE_NANOSLEEP
1870   /* nanosleep is the preferred interface because it offers high
1871      accuracy and, more importantly, because it allows us to reliably
1872      restart receiving a signal such as SIGWINCH.  (There was an
1873      actual Debian bug report about --limit-rate malfunctioning while
1874      the terminal was being resized.)  */
1875   struct timespec sleep, remaining;
1876   sleep.tv_sec = (long) seconds;
1877   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1878   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1879     /* If nanosleep has been interrupted by a signal, adjust the
1880        sleeping period and return to sleep.  */
1881     sleep = remaining;
1882 #elif defined(HAVE_USLEEP)
1883   /* If usleep is available, use it in preference to select.  */
1884   if (seconds >= 1)
1885     {
1886       /* On some systems, usleep cannot handle values larger than
1887          1,000,000.  If the period is larger than that, use sleep
1888          first, then add usleep for subsecond accuracy.  */
1889       sleep (seconds);
1890       seconds -= (long) seconds;
1891     }
1892   usleep (seconds * 1000000);
1893 #else /* fall back select */
1894   /* Note that, although Windows supports select, it can't be used to
1895      implement sleeping because Winsock's select doesn't implement
1896      timeout when it is passed NULL pointers for all fd sets.  (But it
1897      does under Cygwin, which implements Unix-compatible select.)  */
1898   struct timeval sleep;
1899   sleep.tv_sec = (long) seconds;
1900   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1901   select (0, NULL, NULL, NULL, &sleep);
1902   /* If select returns -1 and errno is EINTR, it means we were
1903      interrupted by a signal.  But without knowing how long we've
1904      actually slept, we can't return to sleep.  Using gettimeofday to
1905      track sleeps is slow and unreliable due to clock skew.  */
1906 #endif
1907 }
1908
1909 #endif /* not WINDOWS */
1910
1911 /* Encode the octets in DATA of length LENGTH to base64 format,
1912    storing the result to DEST.  The output will be zero-terminated,
1913    and must point to a writable buffer of at least
1914    1+BASE64_LENGTH(length) bytes.  The function returns the length of
1915    the resulting base64 data, not counting the terminating zero.
1916
1917    This implementation does not emit newlines after 76 characters of
1918    base64 data.  */
1919
1920 int
1921 base64_encode (const void *data, int length, char *dest)
1922 {
1923   /* Conversion table.  */
1924   static const char tbl[64] = {
1925     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1926     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1927     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1928     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1929   };
1930   /* Access bytes in DATA as unsigned char, otherwise the shifts below
1931      don't work for data with MSB set. */
1932   const unsigned char *s = data;
1933   /* Theoretical ANSI violation when length < 3. */
1934   const unsigned char *end = (const unsigned char *) data + length - 2;
1935   char *p = dest;
1936
1937   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1938   for (; s < end; s += 3)
1939     {
1940       *p++ = tbl[s[0] >> 2];
1941       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1942       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1943       *p++ = tbl[s[2] & 0x3f];
1944     }
1945
1946   /* Pad the result if necessary...  */
1947   switch (length % 3)
1948     {
1949     case 1:
1950       *p++ = tbl[s[0] >> 2];
1951       *p++ = tbl[(s[0] & 3) << 4];
1952       *p++ = '=';
1953       *p++ = '=';
1954       break;
1955     case 2:
1956       *p++ = tbl[s[0] >> 2];
1957       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1958       *p++ = tbl[((s[1] & 0xf) << 2)];
1959       *p++ = '=';
1960       break;
1961     }
1962   /* ...and zero-terminate it.  */
1963   *p = '\0';
1964
1965   return p - dest;
1966 }
1967
1968 /* Store in C the next non-whitespace character from the string, or \0
1969    when end of string is reached.  */
1970 #define NEXT_CHAR(c, p) do {                    \
1971   c = (unsigned char) *p++;                     \
1972 } while (c_isspace (c))
1973
1974 #define IS_ASCII(c) (((c) & 0x80) == 0)
1975
1976 /* Decode data from BASE64 (a null-terminated string) into memory
1977    pointed to by DEST.  DEST is assumed to be large enough to
1978    accomodate the decoded data, which is guaranteed to be no more than
1979    3/4*strlen(base64).
1980
1981    Since DEST is assumed to contain binary data, it is not
1982    NUL-terminated.  The function returns the length of the data
1983    written to TO.  -1 is returned in case of error caused by malformed
1984    base64 input.
1985
1986    This function originates from Free Recode.  */
1987
1988 int
1989 base64_decode (const char *base64, void *dest)
1990 {
1991   /* Table of base64 values for first 128 characters.  Note that this
1992      assumes ASCII (but so does Wget in other places).  */
1993   static const signed char base64_char_to_value[128] =
1994     {
1995       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
1996       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
1997       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
1998       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
1999       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2000       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2001       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2002       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2003       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2004       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2005       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2006       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2007       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2008     };
2009 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2010 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2011
2012   const char *p = base64;
2013   char *q = dest;
2014
2015   while (1)
2016     {
2017       unsigned char c;
2018       unsigned long value;
2019
2020       /* Process first byte of a quadruplet.  */
2021       NEXT_CHAR (c, p);
2022       if (!c)
2023         break;
2024       if (c == '=' || !IS_BASE64 (c))
2025         return -1;              /* illegal char while decoding base64 */
2026       value = BASE64_CHAR_TO_VALUE (c) << 18;
2027
2028       /* Process second byte of a quadruplet.  */
2029       NEXT_CHAR (c, p);
2030       if (!c)
2031         return -1;              /* premature EOF while decoding base64 */
2032       if (c == '=' || !IS_BASE64 (c))
2033         return -1;              /* illegal char while decoding base64 */
2034       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2035       *q++ = value >> 16;
2036
2037       /* Process third byte of a quadruplet.  */
2038       NEXT_CHAR (c, p);
2039       if (!c)
2040         return -1;              /* premature EOF while decoding base64 */
2041       if (!IS_BASE64 (c))
2042         return -1;              /* illegal char while decoding base64 */
2043
2044       if (c == '=')
2045         {
2046           NEXT_CHAR (c, p);
2047           if (!c)
2048             return -1;          /* premature EOF while decoding base64 */
2049           if (c != '=')
2050             return -1;          /* padding `=' expected but not found */
2051           continue;
2052         }
2053
2054       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2055       *q++ = 0xff & value >> 8;
2056
2057       /* Process fourth byte of a quadruplet.  */
2058       NEXT_CHAR (c, p);
2059       if (!c)
2060         return -1;              /* premature EOF while decoding base64 */
2061       if (c == '=')
2062         continue;
2063       if (!IS_BASE64 (c))
2064         return -1;              /* illegal char while decoding base64 */
2065
2066       value |= BASE64_CHAR_TO_VALUE (c);
2067       *q++ = 0xff & value;
2068     }
2069 #undef IS_BASE64
2070 #undef BASE64_CHAR_TO_VALUE
2071
2072   return q - (char *) dest;
2073 }
2074
2075 #undef IS_ASCII
2076 #undef NEXT_CHAR
2077 \f
2078 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2079    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2080
2081 static void
2082 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2083                     int (*cmpfun) (const void *, const void *))
2084 {
2085 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2086   if (from < to)
2087     {
2088       size_t i, j, k;
2089       size_t mid = (to + from) / 2;
2090       mergesort_internal (base, temp, size, from, mid, cmpfun);
2091       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2092       i = from;
2093       j = mid + 1;
2094       for (k = from; (i <= mid) && (j <= to); k++)
2095         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2096           memcpy (ELT (temp, k), ELT (base, i++), size);
2097         else
2098           memcpy (ELT (temp, k), ELT (base, j++), size);
2099       while (i <= mid)
2100         memcpy (ELT (temp, k++), ELT (base, i++), size);
2101       while (j <= to)
2102         memcpy (ELT (temp, k++), ELT (base, j++), size);
2103       for (k = from; k <= to; k++)
2104         memcpy (ELT (base, k), ELT (temp, k), size);
2105     }
2106 #undef ELT
2107 }
2108
2109 /* Stable sort with interface exactly like standard library's qsort.
2110    Uses mergesort internally, allocating temporary storage with
2111    alloca.  */
2112
2113 void
2114 stable_sort (void *base, size_t nmemb, size_t size,
2115              int (*cmpfun) (const void *, const void *))
2116 {
2117   if (size > 1)
2118     {
2119       void *temp = alloca (nmemb * size * sizeof (void *));
2120       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2121     }
2122 }
2123 \f
2124 /* Print a decimal number.  If it is equal to or larger than ten, the
2125    number is rounded.  Otherwise it is printed with one significant
2126    digit without trailing zeros and with no more than three fractional
2127    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2128    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2129
2130    This is useful for displaying durations because it provides
2131    order-of-magnitude information without unnecessary clutter --
2132    long-running downloads are shown without the fractional part, and
2133    short ones still retain one significant digit.  */
2134
2135 const char *
2136 print_decimal (double number)
2137 {
2138   static char buf[32];
2139   double n = number >= 0 ? number : -number;
2140
2141   if (n >= 9.95)
2142     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2143        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2144     snprintf (buf, sizeof buf, "%.0f", number);
2145   else if (n >= 0.95)
2146     snprintf (buf, sizeof buf, "%.1f", number);
2147   else if (n >= 0.001)
2148     snprintf (buf, sizeof buf, "%.1g", number);
2149   else if (n >= 0.0005)
2150     /* round [0.0005, 0.001) to 0.001 */
2151     snprintf (buf, sizeof buf, "%.3f", number);
2152   else
2153     /* print numbers close to 0 as 0, not 0.000 */
2154     strcpy (buf, "0");
2155
2156   return buf;
2157 }
2158
2159 #ifdef TESTING
2160
2161 const char *
2162 test_subdir_p()
2163 {
2164   int i;
2165   struct {
2166     char *d1;
2167     char *d2;
2168     bool result;
2169   } test_array[] = {
2170     { "/somedir", "/somedir", true },
2171     { "/somedir", "/somedir/d2", true },
2172     { "/somedir/d1", "/somedir", false },
2173   };
2174
2175   for (i = 0; i < countof(test_array); ++i)
2176     {
2177       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2178
2179       mu_assert ("test_subdir_p: wrong result",
2180                  res == test_array[i].result);
2181     }
2182
2183   return NULL;
2184 }
2185
2186 const char *
2187 test_dir_matches_p()
2188 {
2189   int i;
2190   struct {
2191     char *dirlist[3];
2192     char *dir;
2193     bool result;
2194   } test_array[] = {
2195     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2196     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2197     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2198     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2199     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2200     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2201     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2202     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2203     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2204     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2205     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2206     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2207     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2208   };
2209
2210   for (i = 0; i < countof(test_array); ++i)
2211     {
2212       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2213
2214       mu_assert ("test_dir_matches_p: wrong result",
2215                  res == test_array[i].result);
2216     }
2217
2218   return NULL;
2219 }
2220
2221 #endif /* TESTING */
2222