sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
   3    2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 #include "wget.h"
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #include <time.h>
  37 #ifdef HAVE_SYS_TIME_H
  38 # include <sys/time.h>
  39 #endif
  40 #ifdef HAVE_UNISTD_H
  41 # include <unistd.h>
  42 #endif
  43 #ifdef HAVE_MMAP
  44 # include <sys/mman.h>
  45 #endif
  46 #ifdef HAVE_PROCESS_H
  47 # include <process.h>  /* getpid() */
  48 #endif
  49 #ifdef HAVE_UTIME_H
  50 # include <utime.h>
  51 #endif
  52 #ifdef HAVE_SYS_UTIME_H
  53 # include <sys/utime.h>
  54 #endif
  55 #include <errno.h>
  56 #include <fcntl.h>
  57 #include <assert.h>
  58 #include <stdarg.h>
  59 #include <locale.h>
  60
  61 /* For TIOCGWINSZ and friends: */
  62 #ifdef HAVE_SYS_IOCTL_H
  63 # include <sys/ioctl.h>
  64 #endif
  65 #ifdef HAVE_TERMIOS_H
  66 # include <termios.h>
  67 #endif
  68
  69 /* Needed for Unix version of run_with_timeout. */
  70 #include <signal.h>
  71 #include <setjmp.h>
  72
  73 #ifndef HAVE_SIGSETJMP
  74 /* If sigsetjmp is a macro, configure won't pick it up. */
  75 # ifdef sigsetjmp
  76 #  define HAVE_SIGSETJMP
  77 # endif
  78 #endif
  79
  80 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  81 # define USE_SIGNAL_TIMEOUT
  82 #endif
  83
  84 #include "utils.h"
  85 #include "hash.h"
  86
  87 #ifdef TESTING
  88 #include "test.h"
  89 #endif
  90
  91 /* Utility function: like xstrdup(), but also lowercases S.  */
  92
  93 char *
  94 xstrdup_lower (const char *s)
  95 {
  96   char *copy = xstrdup (s);
  97   char *p = copy;
  98   for (; *p; p++)
  99     *p = c_tolower (*p);
 100   return copy;
 101 }
 102
 103 /* Copy the string formed by two pointers (one on the beginning, other
 104    on the char after the last char) to a new, malloc-ed location.
 105    0-terminate it.  */
 106 char *
 107 strdupdelim (const char *beg, const char *end)
 108 {
 109   char *res = xmalloc (end - beg + 1);
 110   memcpy (res, beg, end - beg);
 111   res[end - beg] = '\0';
 112   return res;
 113 }
 114
 115 /* Parse a string containing comma-separated elements, and return a
 116    vector of char pointers with the elements.  Spaces following the
 117    commas are ignored.  */
 118 char **
 119 sepstring (const char *s)
 120 {
 121   char **res;
 122   const char *p;
 123   int i = 0;
 124
 125   if (!s || !*s)
 126     return NULL;
 127   res = NULL;
 128   p = s;
 129   while (*s)
 130     {
 131       if (*s == ',')
 132         {
 133           res = xrealloc (res, (i + 2) * sizeof (char *));
 134           res[i] = strdupdelim (p, s);
 135           res[++i] = NULL;
 136           ++s;
 137           /* Skip the blanks following the ','.  */
 138           while (c_isspace (*s))
 139             ++s;
 140           p = s;
 141         }
 142       else
 143         ++s;
 144     }
 145   res = xrealloc (res, (i + 2) * sizeof (char *));
 146   res[i] = strdupdelim (p, s);
 147   res[i + 1] = NULL;
 148   return res;
 149 }
 150 \f
 151 /* Like sprintf, but prints into a string of sufficient size freshly
 152    allocated with malloc, which is returned.  If unable to print due
 153    to invalid format, returns NULL.  Inability to allocate needed
 154    memory results in abort, as with xmalloc.  This is in spirit
 155    similar to the GNU/BSD extension asprintf, but somewhat easier to
 156    use.
 157
 158    Internally the function either calls vasprintf or loops around
 159    vsnprintf until the correct size is found.  Since Wget also ships a
 160    fallback implementation of vsnprintf, this should be portable.  */
 161
 162 /* Constant is using for limits memory allocation for text buffer.
 163    Applicable in situation when: vasprintf is not available in the system
 164    and vsnprintf return -1 when long line is truncated (in old versions of
 165    glibc and in other system where C99 doesn`t support) */
 166
 167 #define FMT_MAX_LENGTH 1048576
 168
 169 char *
 170 aprintf (const char *fmt, ...)
 171 {
 172 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 173   /* Use vasprintf. */
 174   int ret;
 175   va_list args;
 176   char *str;
 177   va_start (args, fmt);
 178   ret = vasprintf (&str, fmt, args);
 179   va_end (args);
 180   if (ret < 0 && errno == ENOMEM)
 181     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 182                                                       with xmalloc/xrealloc */
 183   else if (ret < 0)
 184     return NULL;
 185   return str;
 186 #else  /* not HAVE_VASPRINTF */
 187
 188   /* vasprintf is unavailable.  snprintf into a small buffer and
 189      resize it as necessary. */
 190   int size = 32;
 191   char *str = xmalloc (size);
 192
 193   /* #### This code will infloop and eventually abort in xrealloc if
 194      passed a FMT that causes snprintf to consistently return -1.  */
 195
 196   while (1)
 197     {
 198       int n;
 199       va_list args;
 200
 201       va_start (args, fmt);
 202       n = vsnprintf (str, size, fmt, args);
 203       va_end (args);
 204
 205       /* If the printing worked, return the string. */
 206       if (n > -1 && n < size)
 207         return str;
 208
 209       /* Else try again with a larger buffer. */
 210       if (n > -1)               /* C99 */
 211         size = n + 1;           /* precisely what is needed */
 212       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 213         {                               /* maybe we have some wrong
 214                                            format string? */
 215           logprintf (LOG_ALWAYS,
 216                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 217                        "aborting.\n"),
 218                      exec_name, size);  /* printout a log message */
 219           abort ();                     /* and abort... */
 220         }
 221       else
 222         {
 223           /* else, we continue to grow our
 224            * buffer: Twice the old size. */
 225           size <<= 1;
 226         }
 227       str = xrealloc (str, size);
 228     }
 229 #endif /* not HAVE_VASPRINTF */
 230 }
 231
 232 /* Concatenate the NULL-terminated list of string arguments into
 233    freshly allocated space.  */
 234
 235 char *
 236 concat_strings (const char *str0, ...)
 237 {
 238   va_list args;
 239   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 240   char *ret, *p;
 241
 242   const char *next_str;
 243   int total_length = 0;
 244   int argcount;
 245
 246   /* Calculate the length of and allocate the resulting string. */
 247
 248   argcount = 0;
 249   va_start (args, str0);
 250   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 251     {
 252       int len = strlen (next_str);
 253       if (argcount < countof (saved_lengths))
 254         saved_lengths[argcount++] = len;
 255       total_length += len;
 256     }
 257   va_end (args);
 258   p = ret = xmalloc (total_length + 1);
 259
 260   /* Copy the strings into the allocated space. */
 261
 262   argcount = 0;
 263   va_start (args, str0);
 264   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 265     {
 266       int len;
 267       if (argcount < countof (saved_lengths))
 268         len = saved_lengths[argcount++];
 269       else
 270         len = strlen (next_str);
 271       memcpy (p, next_str, len);
 272       p += len;
 273     }
 274   va_end (args);
 275   *p = '\0';
 276
 277   return ret;
 278 }
 279 \f
 280 /* Format the provided time according to the specified format.  The
 281    format is a string with format elements supported by strftime.  */
 282
 283 static char *
 284 fmttime (time_t t, const char *fmt)
 285 {
 286   static char output[32];
 287   struct tm *tm = localtime(&t);
 288   if (!tm)
 289     abort ();
 290   if (!strftime(output, sizeof(output), fmt, tm))
 291     abort ();
 292   return output;
 293 }
 294
 295 /* Return pointer to a static char[] buffer in which zero-terminated
 296    string-representation of TM (in form hh:mm:ss) is printed.
 297
 298    If TM is NULL, the current time will be used.  */
 299
 300 char *
 301 time_str (time_t t)
 302 {
 303   return fmttime(t, "%H:%M:%S");
 304 }
 305
 306 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 307
 308 char *
 309 datetime_str (time_t t)
 310 {
 311   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 312 }
 313 \f
 314 /* The Windows versions of the following two functions are defined in
 315    mswindows.c. On MSDOS this function should never be called. */
 316
 317 #if !defined(WINDOWS) && !defined(MSDOS)
 318 void
 319 fork_to_background (void)
 320 {
 321   pid_t pid;
 322   /* Whether we arrange our own version of opt.lfilename here.  */
 323   bool logfile_changed = false;
 324
 325   if (opt.quiet && !opt.server_response)
 326     {
 327       /* Don't bother with a logfile, there are virtually no logs we
 328          issue in quiet mode. (Server responses in FTP are the
 329          exception, when enabled.) */
 330       log_close ();
 331     }
 332   if (!opt.lfilename)
 333     {
 334       /* We must create the file immediately to avoid either a race
 335          condition (which arises from using unique_name and failing to
 336          use fopen_excl) or lying to the user about the log file name
 337          (which arises from using unique_name, printing the name, and
 338          using fopen_excl later on.)  */
 339       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 340       if (new_log_fp)
 341         {
 342           logfile_changed = true;
 343           fclose (new_log_fp);
 344         }
 345     }
 346   pid = fork ();
 347   if (pid < 0)
 348     {
 349       /* parent, error */
 350       perror ("fork");
 351       exit (1);
 352     }
 353   else if (pid != 0)
 354     {
 355       /* parent, no error */
 356       if (!quiet)
 357         printf (_("Continuing in background, pid %d.\n"), (int) pid);
 358       if (logfile_changed)
 359         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 360       exit (0);                 /* #### should we use _exit()? */
 361     }
 362
 363   /* child: give up the privileges and keep running. */
 364   setsid ();
 365   freopen ("/dev/null", "r", stdin);
 366   freopen ("/dev/null", "w", stdout);
 367   freopen ("/dev/null", "w", stderr);
 368 }
 369 #endif /* !WINDOWS && !MSDOS */
 370 \f
 371 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 372    specified with TM.  The atime ("access time") is set to the current
 373    time.  */
 374
 375 void
 376 touch (const char *file, time_t tm)
 377 {
 378 #ifdef HAVE_STRUCT_UTIMBUF
 379   struct utimbuf times;
 380 #else
 381   struct {
 382     time_t actime;
 383     time_t modtime;
 384   } times;
 385 #endif
 386   times.modtime = tm;
 387   times.actime = time (NULL);
 388   if (utime (file, &times) == -1)
 389     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 390 }
 391
 392 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 393    nothing under MS-Windows.  */
 394 int
 395 remove_link (const char *file)
 396 {
 397   int err = 0;
 398   struct_stat st;
 399
 400   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 401     {
 402       DEBUGP (("Unlinking %s (symlink).\n", file));
 403       err = unlink (file);
 404       if (err != 0)
 405         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 406                    file, strerror (errno));
 407     }
 408   return err;
 409 }
 410
 411 /* Does FILENAME exist?  This is quite a lousy implementation, since
 412    it supplies no error codes -- only a yes-or-no answer.  Thus it
 413    will return that a file does not exist if, e.g., the directory is
 414    unreadable.  I don't mind it too much currently, though.  The
 415    proper way should, of course, be to have a third, error state,
 416    other than true/false, but that would introduce uncalled-for
 417    additional complexity to the callers.  */
 418 bool
 419 file_exists_p (const char *filename)
 420 {
 421 #ifdef HAVE_ACCESS
 422   return access (filename, F_OK) >= 0;
 423 #else
 424   struct_stat buf;
 425   return stat (filename, &buf) >= 0;
 426 #endif
 427 }
 428
 429 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 430    Returns 0 on error.  */
 431 bool
 432 file_non_directory_p (const char *path)
 433 {
 434   struct_stat buf;
 435   /* Use lstat() rather than stat() so that symbolic links pointing to
 436      directories can be identified correctly.  */
 437   if (lstat (path, &buf) != 0)
 438     return false;
 439   return S_ISDIR (buf.st_mode) ? false : true;
 440 }
 441
 442 /* Return the size of file named by FILENAME, or -1 if it cannot be
 443    opened or seeked into. */
 444 wgint
 445 file_size (const char *filename)
 446 {
 447 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 448   wgint size;
 449   /* We use fseek rather than stat to determine the file size because
 450      that way we can also verify that the file is readable without
 451      explicitly checking for permissions.  Inspired by the POST patch
 452      by Arnaud Wylie.  */
 453   FILE *fp = fopen (filename, "rb");
 454   if (!fp)
 455     return -1;
 456   fseeko (fp, 0, SEEK_END);
 457   size = ftello (fp);
 458   fclose (fp);
 459   return size;
 460 #else
 461   struct_stat st;
 462   if (stat (filename, &st) < 0)
 463     return -1;
 464   return st.st_size;
 465 #endif
 466 }
 467
 468 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 469    doesn't exist is found.  Return a freshly allocated copy of the
 470    unused file name.  */
 471
 472 static char *
 473 unique_name_1 (const char *prefix)
 474 {
 475   int count = 1;
 476   int plen = strlen (prefix);
 477   char *template = (char *)alloca (plen + 1 + 24);
 478   char *template_tail = template + plen;
 479
 480   memcpy (template, prefix, plen);
 481   *template_tail++ = '.';
 482
 483   do
 484     number_to_string (template_tail, count++);
 485   while (file_exists_p (template));
 486
 487   return xstrdup (template);
 488 }
 489
 490 /* Return a unique file name, based on FILE.
 491
 492    More precisely, if FILE doesn't exist, it is returned unmodified.
 493    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 494    file name that doesn't exist is returned.
 495
 496    The resulting file is not created, only verified that it didn't
 497    exist at the point in time when the function was called.
 498    Therefore, where security matters, don't rely that the file created
 499    by this function exists until you open it with O_EXCL or
 500    equivalent.
 501
 502    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 503    string.  Otherwise, it may return FILE if the file doesn't exist
 504    (and therefore doesn't need changing).  */
 505
 506 char *
 507 unique_name (const char *file, bool allow_passthrough)
 508 {
 509   /* If the FILE itself doesn't exist, return it without
 510      modification. */
 511   if (!file_exists_p (file))
 512     return allow_passthrough ? (char *)file : xstrdup (file);
 513
 514   /* Otherwise, find a numeric suffix that results in unused file name
 515      and return it.  */
 516   return unique_name_1 (file);
 517 }
 518
 519 /* Create a file based on NAME, except without overwriting an existing
 520    file with that name.  Providing O_EXCL is correctly implemented,
 521    this function does not have the race condition associated with
 522    opening the file returned by unique_name.  */
 523
 524 FILE *
 525 unique_create (const char *name, bool binary, char **opened_name)
 526 {
 527   /* unique file name, based on NAME */
 528   char *uname = unique_name (name, false);
 529   FILE *fp;
 530   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 531     {
 532       xfree (uname);
 533       uname = unique_name (name, false);
 534     }
 535   if (opened_name && fp != NULL)
 536     {
 537       if (fp)
 538         *opened_name = uname;
 539       else
 540         {
 541           *opened_name = NULL;
 542           xfree (uname);
 543         }
 544     }
 545   else
 546     xfree (uname);
 547   return fp;
 548 }
 549
 550 /* Open the file for writing, with the addition that the file is
 551    opened "exclusively".  This means that, if the file already exists,
 552    this function will *fail* and errno will be set to EEXIST.  If
 553    BINARY is set, the file will be opened in binary mode, equivalent
 554    to fopen's "wb".
 555
 556    If opening the file fails for any reason, including the file having
 557    previously existed, this function returns NULL and sets errno
 558    appropriately.  */
 559
 560 FILE *
 561 fopen_excl (const char *fname, bool binary)
 562 {
 563   int fd;
 564 #ifdef O_EXCL
 565   int flags = O_WRONLY | O_CREAT | O_EXCL;
 566 # ifdef O_BINARY
 567   if (binary)
 568     flags |= O_BINARY;
 569 # endif
 570   fd = open (fname, flags, 0666);
 571   if (fd < 0)
 572     return NULL;
 573   return fdopen (fd, binary ? "wb" : "w");
 574 #else  /* not O_EXCL */
 575   /* Manually check whether the file exists.  This is prone to race
 576      conditions, but systems without O_EXCL haven't deserved
 577      better.  */
 578   if (file_exists_p (fname))
 579     {
 580       errno = EEXIST;
 581       return NULL;
 582     }
 583   return fopen (fname, binary ? "wb" : "w");
 584 #endif /* not O_EXCL */
 585 }
 586 \f
 587 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 588    are missing, create them first.  In case any mkdir() call fails,
 589    return its error status.  Returns 0 on successful completion.
 590
 591    The behaviour of this function should be identical to the behaviour
 592    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 593 int
 594 make_directory (const char *directory)
 595 {
 596   int i, ret, quit = 0;
 597   char *dir;
 598
 599   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 600      function is unsafe if called with a read-only char *argument.  */
 601   STRDUP_ALLOCA (dir, directory);
 602
 603   /* If the first character of dir is '/', skip it (and thus enable
 604      creation of absolute-pathname directories.  */
 605   for (i = (*dir == '/'); 1; ++i)
 606     {
 607       for (; dir[i] && dir[i] != '/'; i++)
 608         ;
 609       if (!dir[i])
 610         quit = 1;
 611       dir[i] = '\0';
 612       /* Check whether the directory already exists.  Allow creation of
 613          of intermediate directories to fail, as the initial path components
 614          are not necessarily directories!  */
 615       if (!file_exists_p (dir))
 616         ret = mkdir (dir, 0777);
 617       else
 618         ret = 0;
 619       if (quit)
 620         break;
 621       else
 622         dir[i] = '/';
 623     }
 624   return ret;
 625 }
 626
 627 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 628    should be a file name.
 629
 630    file_merge("/foo/bar", "baz")  => "/foo/baz"
 631    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 632    file_merge("foo", "bar")       => "bar"
 633
 634    In other words, it's a simpler and gentler version of uri_merge.  */
 635
 636 char *
 637 file_merge (const char *base, const char *file)
 638 {
 639   char *result;
 640   const char *cut = (const char *)strrchr (base, '/');
 641
 642   if (!cut)
 643     return xstrdup (file);
 644
 645   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 646   memcpy (result, base, cut - base);
 647   result[cut - base] = '/';
 648   strcpy (result + (cut - base) + 1, file);
 649
 650   return result;
 651 }
 652 \f
 653 /* Like fnmatch, but performs a case-insensitive match.  */
 654
 655 int
 656 fnmatch_nocase (const char *pattern, const char *string, int flags)
 657 {
 658 #ifdef FNM_CASEFOLD
 659   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 660      also present on *BSD platforms, and possibly elsewhere.  */
 661   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 662 #else
 663   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 664   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 665   char *strcopy = (char *) alloca (strlen (string) + 1);
 666   char *p;
 667   for (p = patcopy; *pattern; pattern++, p++)
 668     *p = c_tolower (*pattern);
 669   *p = '\0';
 670   for (p = strcopy; *string; string++, p++)
 671     *p = c_tolower (*string);
 672   *p = '\0';
 673   return fnmatch (patcopy, strcopy, flags);
 674 #endif
 675 }
 676
 677 static bool in_acclist (const char *const *, const char *, bool);
 678
 679 /* Determine whether a file is acceptable to be followed, according to
 680    lists of patterns to accept/reject.  */
 681 bool
 682 acceptable (const char *s)
 683 {
 684   int l = strlen (s);
 685
 686   while (l && s[l] != '/')
 687     --l;
 688   if (s[l] == '/')
 689     s += (l + 1);
 690   if (opt.accepts)
 691     {
 692       if (opt.rejects)
 693         return (in_acclist ((const char *const *)opt.accepts, s, true)
 694                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 695       else
 696         return in_acclist ((const char *const *)opt.accepts, s, true);
 697     }
 698   else if (opt.rejects)
 699     return !in_acclist ((const char *const *)opt.rejects, s, true);
 700   return true;
 701 }
 702
 703 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 704    will return true if and only if D2 begins with `/something/' or is exactly
 705    '/something'.  */
 706 bool
 707 subdir_p (const char *d1, const char *d2)
 708 {
 709   if (*d1 == '\0')
 710     return true;
 711   if (!opt.ignore_case)
 712     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 713       ;
 714   else
 715     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 716       ;
 717
 718   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 719 }
 720
 721 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 722    first element that matches DIR, through wildcards or front comparison (as
 723    appropriate).  */
 724 static bool
 725 dir_matches_p (char **dirlist, const char *dir)
 726 {
 727   char **x;
 728   int (*matcher) (const char *, const char *, int)
 729     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 730
 731   for (x = dirlist; *x; x++)
 732     {
 733       /* Remove leading '/' */
 734       char *p = *x + (**x == '/');
 735       if (has_wildcards_p (p))
 736         {
 737           if (matcher (p, dir, FNM_PATHNAME) == 0)
 738             break;
 739         }
 740       else
 741         {
 742           if (subdir_p (p, dir))
 743             break;
 744         }
 745     }
 746
 747   return *x ? true : false;
 748 }
 749
 750 /* Returns whether DIRECTORY is acceptable for download, wrt the
 751    include/exclude lists.
 752
 753    The leading `/' is ignored in paths; relative and absolute paths
 754    may be freely intermixed.  */
 755
 756 bool
 757 accdir (const char *directory)
 758 {
 759   /* Remove starting '/'.  */
 760   if (*directory == '/')
 761     ++directory;
 762   if (opt.includes)
 763     {
 764       if (!dir_matches_p (opt.includes, directory))
 765         return false;
 766     }
 767   if (opt.excludes)
 768     {
 769       if (dir_matches_p (opt.excludes, directory))
 770         return false;
 771     }
 772   return true;
 773 }
 774
 775 /* Return true if STRING ends with TAIL.  For instance:
 776
 777    match_tail ("abc", "bc", false)  -> 1
 778    match_tail ("abc", "ab", false)  -> 0
 779    match_tail ("abc", "abc", false) -> 1
 780
 781    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 782
 783 bool
 784 match_tail (const char *string, const char *tail, bool fold_case)
 785 {
 786   int i, j;
 787
 788   /* We want this to be fast, so we code two loops, one with
 789      case-folding, one without. */
 790
 791   if (!fold_case)
 792     {
 793       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 794         if (string[i] != tail[j])
 795           break;
 796     }
 797   else
 798     {
 799       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 800         if (c_tolower (string[i]) != c_tolower (tail[j]))
 801           break;
 802     }
 803
 804   /* If the tail was exhausted, the match was succesful.  */
 805   if (j == -1)
 806     return true;
 807   else
 808     return false;
 809 }
 810
 811 /* Checks whether string S matches each element of ACCEPTS.  A list
 812    element are matched either with fnmatch() or match_tail(),
 813    according to whether the element contains wildcards or not.
 814
 815    If the BACKWARD is false, don't do backward comparison -- just compare
 816    them normally.  */
 817 static bool
 818 in_acclist (const char *const *accepts, const char *s, bool backward)
 819 {
 820   for (; *accepts; accepts++)
 821     {
 822       if (has_wildcards_p (*accepts))
 823         {
 824           int res = opt.ignore_case
 825             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 826           /* fnmatch returns 0 if the pattern *does* match the string.  */
 827           if (res == 0)
 828             return true;
 829         }
 830       else
 831         {
 832           if (backward)
 833             {
 834               if (match_tail (s, *accepts, opt.ignore_case))
 835                 return true;
 836             }
 837           else
 838             {
 839               int cmp = opt.ignore_case
 840                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 841               if (cmp == 0)
 842                 return true;
 843             }
 844         }
 845     }
 846   return false;
 847 }
 848
 849 /* Return the location of STR's suffix (file extension).  Examples:
 850    suffix ("foo.bar")       -> "bar"
 851    suffix ("foo.bar.baz")   -> "baz"
 852    suffix ("/foo/bar")      -> NULL
 853    suffix ("/foo.bar/baz")  -> NULL  */
 854 char *
 855 suffix (const char *str)
 856 {
 857   int i;
 858
 859   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 860     ;
 861
 862   if (str[i++] == '.')
 863     return (char *)str + i;
 864   else
 865     return NULL;
 866 }
 867
 868 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 869    `]').  */
 870
 871 bool
 872 has_wildcards_p (const char *s)
 873 {
 874   for (; *s; s++)
 875     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 876       return true;
 877   return false;
 878 }
 879
 880 /* Return true if FNAME ends with a typical HTML suffix.  The
 881    following (case-insensitive) suffixes are presumed to be HTML
 882    files:
 883
 884      html
 885      htm
 886      ?html (`?' matches one character)
 887
 888    #### CAVEAT.  This is not necessarily a good indication that FNAME
 889    refers to a file that contains HTML!  */
 890 bool
 891 has_html_suffix_p (const char *fname)
 892 {
 893   char *suf;
 894
 895   if ((suf = suffix (fname)) == NULL)
 896     return false;
 897   if (!strcasecmp (suf, "html"))
 898     return true;
 899   if (!strcasecmp (suf, "htm"))
 900     return true;
 901   if (suf[0] && !strcasecmp (suf + 1, "html"))
 902     return true;
 903   return false;
 904 }
 905
 906 /* Read a line from FP and return the pointer to freshly allocated
 907    storage.  The storage space is obtained through malloc() and should
 908    be freed with free() when it is no longer needed.
 909
 910    The length of the line is not limited, except by available memory.
 911    The newline character at the end of line is retained.  The line is
 912    terminated with a zero character.
 913
 914    After end-of-file is encountered without anything being read, NULL
 915    is returned.  NULL is also returned on error.  To distinguish
 916    between these two cases, use the stdio function ferror().  */
 917
 918 char *
 919 read_whole_line (FILE *fp)
 920 {
 921   int length = 0;
 922   int bufsize = 82;
 923   char *line = xmalloc (bufsize);
 924
 925   while (fgets (line + length, bufsize - length, fp))
 926     {
 927       length += strlen (line + length);
 928       if (length == 0)
 929         /* Possible for example when reading from a binary file where
 930            a line begins with \0.  */
 931         continue;
 932
 933       if (line[length - 1] == '\n')
 934         break;
 935
 936       /* fgets() guarantees to read the whole line, or to use up the
 937          space we've given it.  We can double the buffer
 938          unconditionally.  */
 939       bufsize <<= 1;
 940       line = xrealloc (line, bufsize);
 941     }
 942   if (length == 0 || ferror (fp))
 943     {
 944       xfree (line);
 945       return NULL;
 946     }
 947   if (length + 1 < bufsize)
 948     /* Relieve the memory from our exponential greediness.  We say
 949        `length + 1' because the terminating \0 is not included in
 950        LENGTH.  We don't need to zero-terminate the string ourselves,
 951        though, because fgets() does that.  */
 952     line = xrealloc (line, length + 1);
 953   return line;
 954 }
 955 \f
 956 /* Read FILE into memory.  A pointer to `struct file_memory' are
 957    returned; use struct element `content' to access file contents, and
 958    the element `length' to know the file length.  `content' is *not*
 959    zero-terminated, and you should *not* read or write beyond the [0,
 960    length) range of characters.
 961
 962    After you are done with the file contents, call read_file_free to
 963    release the memory.
 964
 965    Depending on the operating system and the type of file that is
 966    being read, read_file() either mmap's the file into memory, or
 967    reads the file into the core using read().
 968
 969    If file is named "-", fileno(stdin) is used for reading instead.
 970    If you want to read from a real file named "-", use "./-" instead.  */
 971
 972 struct file_memory *
 973 read_file (const char *file)
 974 {
 975   int fd;
 976   struct file_memory *fm;
 977   long size;
 978   bool inhibit_close = false;
 979
 980   /* Some magic in the finest tradition of Perl and its kin: if FILE
 981      is "-", just use stdin.  */
 982   if (HYPHENP (file))
 983     {
 984       fd = fileno (stdin);
 985       inhibit_close = true;
 986       /* Note that we don't inhibit mmap() in this case.  If stdin is
 987          redirected from a regular file, mmap() will still work.  */
 988     }
 989   else
 990     fd = open (file, O_RDONLY);
 991   if (fd < 0)
 992     return NULL;
 993   fm = xnew (struct file_memory);
 994
 995 #ifdef HAVE_MMAP
 996   {
 997     struct_fstat buf;
 998     if (fstat (fd, &buf) < 0)
 999       goto mmap_lose;
1000     fm->length = buf.st_size;
1001     /* NOTE: As far as I know, the callers of this function never
1002        modify the file text.  Relying on this would enable us to
1003        specify PROT_READ and MAP_SHARED for a marginal gain in
1004        efficiency, but at some cost to generality.  */
1005     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1006                         MAP_PRIVATE, fd, 0);
1007     if (fm->content == (char *)MAP_FAILED)
1008       goto mmap_lose;
1009     if (!inhibit_close)
1010       close (fd);
1011
1012     fm->mmap_p = 1;
1013     return fm;
1014   }
1015
1016  mmap_lose:
1017   /* The most common reason why mmap() fails is that FD does not point
1018      to a plain file.  However, it's also possible that mmap() doesn't
1019      work for a particular type of file.  Therefore, whenever mmap()
1020      fails, we just fall back to the regular method.  */
1021 #endif /* HAVE_MMAP */
1022
1023   fm->length = 0;
1024   size = 512;                   /* number of bytes fm->contents can
1025                                    hold at any given time. */
1026   fm->content = xmalloc (size);
1027   while (1)
1028     {
1029       wgint nread;
1030       if (fm->length > size / 2)
1031         {
1032           /* #### I'm not sure whether the whole exponential-growth
1033              thing makes sense with kernel read.  On Linux at least,
1034              read() refuses to read more than 4K from a file at a
1035              single chunk anyway.  But other Unixes might optimize it
1036              better, and it doesn't *hurt* anything, so I'm leaving
1037              it.  */
1038
1039           /* Normally, we grow SIZE exponentially to make the number
1040              of calls to read() and realloc() logarithmic in relation
1041              to file size.  However, read() can read an amount of data
1042              smaller than requested, and it would be unreasonable to
1043              double SIZE every time *something* was read.  Therefore,
1044              we double SIZE only when the length exceeds half of the
1045              entire allocated size.  */
1046           size <<= 1;
1047           fm->content = xrealloc (fm->content, size);
1048         }
1049       nread = read (fd, fm->content + fm->length, size - fm->length);
1050       if (nread > 0)
1051         /* Successful read. */
1052         fm->length += nread;
1053       else if (nread < 0)
1054         /* Error. */
1055         goto lose;
1056       else
1057         /* EOF */
1058         break;
1059     }
1060   if (!inhibit_close)
1061     close (fd);
1062   if (size > fm->length && fm->length != 0)
1063     /* Due to exponential growth of fm->content, the allocated region
1064        might be much larger than what is actually needed.  */
1065     fm->content = xrealloc (fm->content, fm->length);
1066   fm->mmap_p = 0;
1067   return fm;
1068
1069  lose:
1070   if (!inhibit_close)
1071     close (fd);
1072   xfree (fm->content);
1073   xfree (fm);
1074   return NULL;
1075 }
1076
1077 /* Release the resources held by FM.  Specifically, this calls
1078    munmap() or xfree() on fm->content, depending whether mmap or
1079    malloc/read were used to read in the file.  It also frees the
1080    memory needed to hold the FM structure itself.  */
1081
1082 void
1083 read_file_free (struct file_memory *fm)
1084 {
1085 #ifdef HAVE_MMAP
1086   if (fm->mmap_p)
1087     {
1088       munmap (fm->content, fm->length);
1089     }
1090   else
1091 #endif
1092     {
1093       xfree (fm->content);
1094     }
1095   xfree (fm);
1096 }
1097 \f
1098 /* Free the pointers in a NULL-terminated vector of pointers, then
1099    free the pointer itself.  */
1100 void
1101 free_vec (char **vec)
1102 {
1103   if (vec)
1104     {
1105       char **p = vec;
1106       while (*p)
1107         xfree (*p++);
1108       xfree (vec);
1109     }
1110 }
1111
1112 /* Append vector V2 to vector V1.  The function frees V2 and
1113    reallocates V1 (thus you may not use the contents of neither
1114    pointer after the call).  If V1 is NULL, V2 is returned.  */
1115 char **
1116 merge_vecs (char **v1, char **v2)
1117 {
1118   int i, j;
1119
1120   if (!v1)
1121     return v2;
1122   if (!v2)
1123     return v1;
1124   if (!*v2)
1125     {
1126       /* To avoid j == 0 */
1127       xfree (v2);
1128       return v1;
1129     }
1130   /* Count v1.  */
1131   for (i = 0; v1[i]; i++)
1132     ;
1133   /* Count v2.  */
1134   for (j = 0; v2[j]; j++)
1135     ;
1136   /* Reallocate v1.  */
1137   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1138   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1139   xfree (v2);
1140   return v1;
1141 }
1142
1143 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1144    is allocated as needed.  Return the new value of the vector. */
1145
1146 char **
1147 vec_append (char **vec, const char *str)
1148 {
1149   int cnt;                      /* count of vector elements, including
1150                                    the one we're about to append */
1151   if (vec != NULL)
1152     {
1153       for (cnt = 0; vec[cnt]; cnt++)
1154         ;
1155       ++cnt;
1156     }
1157   else
1158     cnt = 1;
1159   /* Reallocate the array to fit the new element and the NULL. */
1160   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1161   /* Append a copy of STR to the vector. */
1162   vec[cnt - 1] = xstrdup (str);
1163   vec[cnt] = NULL;
1164   return vec;
1165 }
1166 \f
1167 /* Sometimes it's useful to create "sets" of strings, i.e. special
1168    hash tables where you want to store strings as keys and merely
1169    query for their existence.  Here is a set of utility routines that
1170    makes that transparent.  */
1171
1172 void
1173 string_set_add (struct hash_table *ht, const char *s)
1174 {
1175   /* First check whether the set element already exists.  If it does,
1176      do nothing so that we don't have to free() the old element and
1177      then strdup() a new one.  */
1178   if (hash_table_contains (ht, s))
1179     return;
1180
1181   /* We use "1" as value.  It provides us a useful and clear arbitrary
1182      value, and it consumes no memory -- the pointers to the same
1183      string "1" will be shared by all the key-value pairs in all `set'
1184      hash tables.  */
1185   hash_table_put (ht, xstrdup (s), "1");
1186 }
1187
1188 /* Synonym for hash_table_contains... */
1189
1190 int
1191 string_set_contains (struct hash_table *ht, const char *s)
1192 {
1193   return hash_table_contains (ht, s);
1194 }
1195
1196 /* Convert the specified string set to array.  ARRAY should be large
1197    enough to hold hash_table_count(ht) char pointers.  */
1198
1199 void string_set_to_array (struct hash_table *ht, char **array)
1200 {
1201   hash_table_iterator iter;
1202   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1203     *array++ = iter.key;
1204 }
1205
1206 /* Free the string set.  This frees both the storage allocated for
1207    keys and the actual hash table.  (hash_table_destroy would only
1208    destroy the hash table.)  */
1209
1210 void
1211 string_set_free (struct hash_table *ht)
1212 {
1213   hash_table_iterator iter;
1214   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1215     xfree (iter.key);
1216   hash_table_destroy (ht);
1217 }
1218
1219 /* Utility function: simply call xfree() on all keys and values of HT.  */
1220
1221 void
1222 free_keys_and_values (struct hash_table *ht)
1223 {
1224   hash_table_iterator iter;
1225   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1226     {
1227       xfree (iter.key);
1228       xfree (iter.value);
1229     }
1230 }
1231 \f
1232 /* Get digit grouping data for thousand separors by calling
1233    localeconv().  The data includes separator string and grouping info
1234    and is cached after the first call to the function.
1235
1236    In locales that don't set a thousand separator (such as the "C"
1237    locale), this forces it to be ",".  We are now only showing
1238    thousand separators in one place, so this shouldn't be a problem in
1239    practice.  */
1240
1241 static void
1242 get_grouping_data (const char **sep, const char **grouping)
1243 {
1244   static const char *cached_sep;
1245   static const char *cached_grouping;
1246   static bool initialized;
1247   if (!initialized)
1248     {
1249       /* Get the grouping info from the locale. */
1250       struct lconv *lconv = localeconv ();
1251       cached_sep = lconv->thousands_sep;
1252       cached_grouping = lconv->grouping;
1253 #if ! USE_NLS_PROGRESS_BAR
1254       /* We can't count column widths, so ensure that the separator
1255        * is single-byte only (let check below determine what byte). */
1256       if (strlen(cached_sep) > 1)
1257         cached_sep = "";
1258 #endif
1259       if (!*cached_sep)
1260         {
1261           /* Many locales (such as "C" or "hr_HR") don't specify
1262              grouping, which we still want to use it for legibility.
1263              In those locales set the sep char to ',', unless that
1264              character is used for decimal point, in which case set it
1265              to ".".  */
1266           if (*lconv->decimal_point != ',')
1267             cached_sep = ",";
1268           else
1269             cached_sep = ".";
1270           cached_grouping = "\x03";
1271         }
1272       initialized = true;
1273     }
1274   *sep = cached_sep;
1275   *grouping = cached_grouping;
1276 }
1277
1278 /* Return a printed representation of N with thousand separators.
1279    This should respect locale settings, with the exception of the "C"
1280    locale which mandates no separator, but we use one anyway.
1281
1282    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1283    the separators because it's too non-portable, and it's hard to test
1284    for this feature at configure time.  Besides, it wouldn't display
1285    separators in the "C" locale, still used by many Unix users.  */
1286
1287 const char *
1288 with_thousand_seps (wgint n)
1289 {
1290   static char outbuf[48];
1291   char *p = outbuf + sizeof outbuf;
1292
1293   /* Info received from locale */
1294   const char *grouping, *sep;
1295   int seplen;
1296
1297   /* State information */
1298   int i = 0, groupsize;
1299   const char *atgroup;
1300
1301   bool negative = n < 0;
1302
1303   /* Initialize grouping data. */
1304   get_grouping_data (&sep, &grouping);
1305   seplen = strlen (sep);
1306   atgroup = grouping;
1307   groupsize = *atgroup++;
1308
1309   /* This would overflow on WGINT_MIN, but printing negative numbers
1310      is not an important goal of this fuinction.  */
1311   if (negative)
1312     n = -n;
1313
1314   /* Write the number into the buffer, backwards, inserting the
1315      separators as necessary.  */
1316   *--p = '\0';
1317   while (1)
1318     {
1319       *--p = n % 10 + '0';
1320       n /= 10;
1321       if (n == 0)
1322         break;
1323       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1324       if (++i == groupsize)
1325         {
1326           if (seplen == 1)
1327             *--p = *sep;
1328           else
1329             memcpy (p -= seplen, sep, seplen);
1330           i = 0;
1331           if (*atgroup)
1332             groupsize = *atgroup++;
1333         }
1334     }
1335   if (negative)
1336     *--p = '-';
1337
1338   return p;
1339 }
1340
1341 /* N, a byte quantity, is converted to a human-readable abberviated
1342    form a la sizes printed by `ls -lh'.  The result is written to a
1343    static buffer, a pointer to which is returned.
1344
1345    Unlike `with_thousand_seps', this approximates to the nearest unit.
1346    Quoting GNU libit: "Most people visually process strings of 3-4
1347    digits effectively, but longer strings of digits are more prone to
1348    misinterpretation.  Hence, converting to an abbreviated form
1349    usually improves readability."
1350
1351    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1352    original computer-related meaning of "powers of 1024".  We don't
1353    use the "*bibyte" names invented in 1998, and seldom used in
1354    practice.  Wikipedia's entry on "binary prefix" discusses this in
1355    some detail.  */
1356
1357 char *
1358 human_readable (HR_NUMTYPE n)
1359 {
1360   /* These suffixes are compatible with those of GNU `ls -lh'. */
1361   static char powers[] =
1362     {
1363       'K',                      /* kilobyte, 2^10 bytes */
1364       'M',                      /* megabyte, 2^20 bytes */
1365       'G',                      /* gigabyte, 2^30 bytes */
1366       'T',                      /* terabyte, 2^40 bytes */
1367       'P',                      /* petabyte, 2^50 bytes */
1368       'E',                      /* exabyte,  2^60 bytes */
1369     };
1370   static char buf[8];
1371   int i;
1372
1373   /* If the quantity is smaller than 1K, just print it. */
1374   if (n < 1024)
1375     {
1376       snprintf (buf, sizeof (buf), "%d", (int) n);
1377       return buf;
1378     }
1379
1380   /* Loop over powers, dividing N with 1024 in each iteration.  This
1381      works unchanged for all sizes of wgint, while still avoiding
1382      non-portable `long double' arithmetic.  */
1383   for (i = 0; i < countof (powers); i++)
1384     {
1385       /* At each iteration N is greater than the *subsequent* power.
1386          That way N/1024.0 produces a decimal number in the units of
1387          *this* power.  */
1388       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1389         {
1390           double val = n / 1024.0;
1391           /* Print values smaller than 10 with one decimal digits, and
1392              others without any decimals.  */
1393           snprintf (buf, sizeof (buf), "%.*f%c",
1394                     val < 10 ? 1 : 0, val, powers[i]);
1395           return buf;
1396         }
1397       n /= 1024;
1398     }
1399   return NULL;                  /* unreached */
1400 }
1401
1402 /* Count the digits in the provided number.  Used to allocate space
1403    when printing numbers.  */
1404
1405 int
1406 numdigit (wgint number)
1407 {
1408   int cnt = 1;
1409   if (number < 0)
1410     ++cnt;                      /* accomodate '-' */
1411   while ((number /= 10) != 0)
1412     ++cnt;
1413   return cnt;
1414 }
1415
1416 #define PR(mask) *p++ = n / (mask) + '0'
1417
1418 /* DIGITS_<D> is used to print a D-digit number and should be called
1419    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1420    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1421    Recursively this continues until DIGITS_1 is invoked.  */
1422
1423 #define DIGITS_1(mask) PR (mask)
1424 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1425 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1426 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1427 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1428 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1429 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1430 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1431 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1432 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1433
1434 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1435
1436 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1437 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1438 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1439 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1440 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1441 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1442 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1443 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1444 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1445
1446 /* Shorthand for casting to wgint. */
1447 #define W wgint
1448
1449 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1450    `sprintf(buffer, "%lld", (long long) number)', only typically much
1451    faster and portable to machines without long long.
1452
1453    The speedup may make a difference in programs that frequently
1454    convert numbers to strings.  Some implementations of sprintf,
1455    particularly the one in some versions of GNU libc, have been known
1456    to be quite slow when converting integers to strings.
1457
1458    Return the pointer to the location where the terminating zero was
1459    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1460    function is done.)
1461
1462    BUFFER should be large enough to accept as many bytes as you expect
1463    the number to take up.  On machines with 64-bit wgints the maximum
1464    needed size is 24 bytes.  That includes the digits needed for the
1465    largest 64-bit number, the `-' sign in case it's negative, and the
1466    terminating '\0'.  */
1467
1468 char *
1469 number_to_string (char *buffer, wgint number)
1470 {
1471   char *p = buffer;
1472   wgint n = number;
1473
1474   int last_digit_char = 0;
1475
1476 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1477   /* We are running in a very strange environment.  Leave the correct
1478      printing to sprintf.  */
1479   p += sprintf (buf, "%j", (intmax_t) (n));
1480 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1481
1482   if (n < 0)
1483     {
1484       if (n < -WGINT_MAX)
1485         {
1486           /* n = -n would overflow because -n would evaluate to a
1487              wgint value larger than WGINT_MAX.  Need to make n
1488              smaller and handle the last digit separately.  */
1489           int last_digit = n % 10;
1490           /* The sign of n%10 is implementation-defined. */
1491           if (last_digit < 0)
1492             last_digit_char = '0' - last_digit;
1493           else
1494             last_digit_char = '0' + last_digit;
1495           /* After n is made smaller, -n will not overflow. */
1496           n /= 10;
1497         }
1498
1499       *p++ = '-';
1500       n = -n;
1501     }
1502
1503   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1504      way printing any N is fully open-coded without a loop or jump.
1505      (Also see description of DIGITS_*.)  */
1506
1507   if      (n < 10)                       DIGITS_1 (1);
1508   else if (n < 100)                      DIGITS_2 (10);
1509   else if (n < 1000)                     DIGITS_3 (100);
1510   else if (n < 10000)                    DIGITS_4 (1000);
1511   else if (n < 100000)                   DIGITS_5 (10000);
1512   else if (n < 1000000)                  DIGITS_6 (100000);
1513   else if (n < 10000000)                 DIGITS_7 (1000000);
1514   else if (n < 100000000)                DIGITS_8 (10000000);
1515   else if (n < 1000000000)               DIGITS_9 (100000000);
1516 #if SIZEOF_WGINT == 4
1517   /* wgint is 32 bits wide: no number has more than 10 digits. */
1518   else                                   DIGITS_10 (1000000000);
1519 #else
1520   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1521      Constants are constructed by compile-time multiplication to avoid
1522      dealing with different notations for 64-bit constants
1523      (nL/nLL/nI64, depending on the compiler and architecture).  */
1524   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1525   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1526   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1527   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1528   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1529   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1530   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1531   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1532   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1533   else                                   DIGITS_19 (1000000000*(W)1000000000);
1534 #endif
1535
1536   if (last_digit_char)
1537     *p++ = last_digit_char;
1538
1539   *p = '\0';
1540 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1541
1542   return p;
1543 }
1544
1545 #undef PR
1546 #undef W
1547 #undef SPRINTF_WGINT
1548 #undef DIGITS_1
1549 #undef DIGITS_2
1550 #undef DIGITS_3
1551 #undef DIGITS_4
1552 #undef DIGITS_5
1553 #undef DIGITS_6
1554 #undef DIGITS_7
1555 #undef DIGITS_8
1556 #undef DIGITS_9
1557 #undef DIGITS_10
1558 #undef DIGITS_11
1559 #undef DIGITS_12
1560 #undef DIGITS_13
1561 #undef DIGITS_14
1562 #undef DIGITS_15
1563 #undef DIGITS_16
1564 #undef DIGITS_17
1565 #undef DIGITS_18
1566 #undef DIGITS_19
1567
1568 #define RING_SIZE 3
1569
1570 /* Print NUMBER to a statically allocated string and return a pointer
1571    to the printed representation.
1572
1573    This function is intended to be used in conjunction with printf.
1574    It is hard to portably print wgint values:
1575     a) you cannot use printf("%ld", number) because wgint can be long
1576        long on 32-bit machines with LFS.
1577     b) you cannot use printf("%lld", number) because NUMBER could be
1578        long on 32-bit machines without LFS, or on 64-bit machines,
1579        which do not require LFS.  Also, Windows doesn't support %lld.
1580     c) you cannot use printf("%j", (int_max_t) number) because not all
1581        versions of printf support "%j", the most notable being the one
1582        on Windows.
1583     d) you cannot #define WGINT_FMT to the appropriate format and use
1584        printf(WGINT_FMT, number) because that would break translations
1585        for user-visible messages, such as printf("Downloaded: %d
1586        bytes\n", number).
1587
1588    What you should use instead is printf("%s", number_to_static_string
1589    (number)).
1590
1591    CAVEAT: since the function returns pointers to static data, you
1592    must be careful to copy its result before calling it again.
1593    However, to make it more useful with printf, the function maintains
1594    an internal ring of static buffers to return.  That way things like
1595    printf("%s %s", number_to_static_string (num1),
1596    number_to_static_string (num2)) work as expected.  Three buffers
1597    are currently used, which means that "%s %s %s" will work, but "%s
1598    %s %s %s" won't.  If you need to print more than three wgints,
1599    bump the RING_SIZE (or rethink your message.)  */
1600
1601 char *
1602 number_to_static_string (wgint number)
1603 {
1604   static char ring[RING_SIZE][24];
1605   static int ringpos;
1606   char *buf = ring[ringpos];
1607   number_to_string (buf, number);
1608   ringpos = (ringpos + 1) % RING_SIZE;
1609   return buf;
1610 }
1611 \f
1612 /* Determine the width of the terminal we're running on.  If that's
1613    not possible, return 0.  */
1614
1615 int
1616 determine_screen_width (void)
1617 {
1618   /* If there's a way to get the terminal size using POSIX
1619      tcgetattr(), somebody please tell me.  */
1620 #ifdef TIOCGWINSZ
1621   int fd;
1622   struct winsize wsz;
1623
1624   if (opt.lfilename != NULL)
1625     return 0;
1626
1627   fd = fileno (stderr);
1628   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1629     return 0;                   /* most likely ENOTTY */
1630
1631   return wsz.ws_col;
1632 #elif defined(WINDOWS)
1633   CONSOLE_SCREEN_BUFFER_INFO csbi;
1634   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1635     return 0;
1636   return csbi.dwSize.X;
1637 #else  /* neither TIOCGWINSZ nor WINDOWS */
1638   return 0;
1639 #endif /* neither TIOCGWINSZ nor WINDOWS */
1640 }
1641 \f
1642 /* Whether the rnd system (either rand or [dl]rand48) has been
1643    seeded.  */
1644 static int rnd_seeded;
1645
1646 /* Return a random number between 0 and MAX-1, inclusive.
1647
1648    If the system does not support lrand48 and MAX is greater than the
1649    value of RAND_MAX+1 on the system, the returned value will be in
1650    the range [0, RAND_MAX].  This may be fixed in a future release.
1651    The random number generator is seeded automatically the first time
1652    it is called.
1653
1654    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1655    for cryptography.  It is only meant to be used in situations where
1656    quality of the random numbers returned doesn't really matter.  */
1657
1658 int
1659 random_number (int max)
1660 {
1661 #ifdef HAVE_DRAND48
1662   if (!rnd_seeded)
1663     {
1664       srand48 ((long) time (NULL) ^ (long) getpid ());
1665       rnd_seeded = 1;
1666     }
1667   return lrand48 () % max;
1668 #else  /* not HAVE_DRAND48 */
1669
1670   double bounded;
1671   int rnd;
1672   if (!rnd_seeded)
1673     {
1674       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1675       rnd_seeded = 1;
1676     }
1677   rnd = rand ();
1678
1679   /* Like rand() % max, but uses the high-order bits for better
1680      randomness on architectures where rand() is implemented using a
1681      simple congruential generator.  */
1682
1683   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1684   return (int) bounded;
1685
1686 #endif /* not HAVE_DRAND48 */
1687 }
1688
1689 /* Return a random uniformly distributed floating point number in the
1690    [0, 1) range.  Uses drand48 where available, and a really lame
1691    kludge elsewhere.  */
1692
1693 double
1694 random_float (void)
1695 {
1696 #ifdef HAVE_DRAND48
1697   if (!rnd_seeded)
1698     {
1699       srand48 ((long) time (NULL) ^ (long) getpid ());
1700       rnd_seeded = 1;
1701     }
1702   return drand48 ();
1703 #else  /* not HAVE_DRAND48 */
1704   return (  random_number (10000) / 10000.0
1705           + random_number (10000) / (10000.0 * 10000.0)
1706           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1707           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1708 #endif /* not HAVE_DRAND48 */
1709 }
1710 \f
1711 /* Implementation of run_with_timeout, a generic timeout-forcing
1712    routine for systems with Unix-like signal handling.  */
1713
1714 #ifdef USE_SIGNAL_TIMEOUT
1715 # ifdef HAVE_SIGSETJMP
1716 #  define SETJMP(env) sigsetjmp (env, 1)
1717
1718 static sigjmp_buf run_with_timeout_env;
1719
1720 static void
1721 abort_run_with_timeout (int sig)
1722 {
1723   assert (sig == SIGALRM);
1724   siglongjmp (run_with_timeout_env, -1);
1725 }
1726 # else /* not HAVE_SIGSETJMP */
1727 #  define SETJMP(env) setjmp (env)
1728
1729 static jmp_buf run_with_timeout_env;
1730
1731 static void
1732 abort_run_with_timeout (int sig)
1733 {
1734   assert (sig == SIGALRM);
1735   /* We don't have siglongjmp to preserve the set of blocked signals;
1736      if we longjumped out of the handler at this point, SIGALRM would
1737      remain blocked.  We must unblock it manually. */
1738   int mask = siggetmask ();
1739   mask &= ~sigmask (SIGALRM);
1740   sigsetmask (mask);
1741
1742   /* Now it's safe to longjump. */
1743   longjmp (run_with_timeout_env, -1);
1744 }
1745 # endif /* not HAVE_SIGSETJMP */
1746
1747 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1748    setitimer where available, alarm otherwise.
1749
1750    TIMEOUT should be non-zero.  If the timeout value is so small that
1751    it would be rounded to zero, it is rounded to the least legal value
1752    instead (1us for setitimer, 1s for alarm).  That ensures that
1753    SIGALRM will be delivered in all cases.  */
1754
1755 static void
1756 alarm_set (double timeout)
1757 {
1758 #ifdef ITIMER_REAL
1759   /* Use the modern itimer interface. */
1760   struct itimerval itv;
1761   xzero (itv);
1762   itv.it_value.tv_sec = (long) timeout;
1763   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1764   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1765     /* Ensure that we wait for at least the minimum interval.
1766        Specifying zero would mean "wait forever".  */
1767     itv.it_value.tv_usec = 1;
1768   setitimer (ITIMER_REAL, &itv, NULL);
1769 #else  /* not ITIMER_REAL */
1770   /* Use the old alarm() interface. */
1771   int secs = (int) timeout;
1772   if (secs == 0)
1773     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1774        because alarm(0) means "never deliver the alarm", i.e. "wait
1775        forever", which is not what someone who specifies a 0.5s
1776        timeout would expect.  */
1777     secs = 1;
1778   alarm (secs);
1779 #endif /* not ITIMER_REAL */
1780 }
1781
1782 /* Cancel the alarm set with alarm_set. */
1783
1784 static void
1785 alarm_cancel (void)
1786 {
1787 #ifdef ITIMER_REAL
1788   struct itimerval disable;
1789   xzero (disable);
1790   setitimer (ITIMER_REAL, &disable, NULL);
1791 #else  /* not ITIMER_REAL */
1792   alarm (0);
1793 #endif /* not ITIMER_REAL */
1794 }
1795
1796 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1797    seconds.  Returns true if the function was interrupted with a
1798    timeout, false otherwise.
1799
1800    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1801    using setitimer() or alarm().  The timeout is enforced by
1802    longjumping out of the SIGALRM handler.  This has several
1803    advantages compared to the traditional approach of relying on
1804    signals causing system calls to exit with EINTR:
1805
1806      * The callback function is *forcibly* interrupted after the
1807        timeout expires, (almost) regardless of what it was doing and
1808        whether it was in a syscall.  For example, a calculation that
1809        takes a long time is interrupted as reliably as an IO
1810        operation.
1811
1812      * It works with both SYSV and BSD signals because it doesn't
1813        depend on the default setting of SA_RESTART.
1814
1815      * It doesn't require special handler setup beyond a simple call
1816        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1817        optional.)
1818
1819    The only downside is that, if FUN allocates internal resources that
1820    are normally freed prior to exit from the functions, they will be
1821    lost in case of timeout.  */
1822
1823 bool
1824 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1825 {
1826   int saved_errno;
1827
1828   if (timeout == 0)
1829     {
1830       fun (arg);
1831       return false;
1832     }
1833
1834   signal (SIGALRM, abort_run_with_timeout);
1835   if (SETJMP (run_with_timeout_env) != 0)
1836     {
1837       /* Longjumped out of FUN with a timeout. */
1838       signal (SIGALRM, SIG_DFL);
1839       return true;
1840     }
1841   alarm_set (timeout);
1842   fun (arg);
1843
1844   /* Preserve errno in case alarm() or signal() modifies it. */
1845   saved_errno = errno;
1846   alarm_cancel ();
1847   signal (SIGALRM, SIG_DFL);
1848   errno = saved_errno;
1849
1850   return false;
1851 }
1852
1853 #else  /* not USE_SIGNAL_TIMEOUT */
1854
1855 #ifndef WINDOWS
1856 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1857    define it under Windows, because Windows has its own version of
1858    run_with_timeout that uses threads.  */
1859
1860 bool
1861 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1862 {
1863   fun (arg);
1864   return false;
1865 }
1866 #endif /* not WINDOWS */
1867 #endif /* not USE_SIGNAL_TIMEOUT */
1868 \f
1869 #ifndef WINDOWS
1870
1871 /* Sleep the specified amount of seconds.  On machines without
1872    nanosleep(), this may sleep shorter if interrupted by signals.  */
1873
1874 void
1875 xsleep (double seconds)
1876 {
1877 #ifdef HAVE_NANOSLEEP
1878   /* nanosleep is the preferred interface because it offers high
1879      accuracy and, more importantly, because it allows us to reliably
1880      restart receiving a signal such as SIGWINCH.  (There was an
1881      actual Debian bug report about --limit-rate malfunctioning while
1882      the terminal was being resized.)  */
1883   struct timespec sleep, remaining;
1884   sleep.tv_sec = (long) seconds;
1885   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1886   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1887     /* If nanosleep has been interrupted by a signal, adjust the
1888        sleeping period and return to sleep.  */
1889     sleep = remaining;
1890 #elif defined(HAVE_USLEEP)
1891   /* If usleep is available, use it in preference to select.  */
1892   if (seconds >= 1)
1893     {
1894       /* On some systems, usleep cannot handle values larger than
1895          1,000,000.  If the period is larger than that, use sleep
1896          first, then add usleep for subsecond accuracy.  */
1897       sleep (seconds);
1898       seconds -= (long) seconds;
1899     }
1900   usleep (seconds * 1000000);
1901 #else /* fall back select */
1902   /* Note that, although Windows supports select, it can't be used to
1903      implement sleeping because Winsock's select doesn't implement
1904      timeout when it is passed NULL pointers for all fd sets.  (But it
1905      does under Cygwin, which implements Unix-compatible select.)  */
1906   struct timeval sleep;
1907   sleep.tv_sec = (long) seconds;
1908   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1909   select (0, NULL, NULL, NULL, &sleep);
1910   /* If select returns -1 and errno is EINTR, it means we were
1911      interrupted by a signal.  But without knowing how long we've
1912      actually slept, we can't return to sleep.  Using gettimeofday to
1913      track sleeps is slow and unreliable due to clock skew.  */
1914 #endif
1915 }
1916
1917 #endif /* not WINDOWS */
1918
1919 /* Encode the octets in DATA of length LENGTH to base64 format,
1920    storing the result to DEST.  The output will be zero-terminated,
1921    and must point to a writable buffer of at least
1922    1+BASE64_LENGTH(length) bytes.  The function returns the length of
1923    the resulting base64 data, not counting the terminating zero.
1924
1925    This implementation does not emit newlines after 76 characters of
1926    base64 data.  */
1927
1928 int
1929 base64_encode (const void *data, int length, char *dest)
1930 {
1931   /* Conversion table.  */
1932   static const char tbl[64] = {
1933     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1934     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1935     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1936     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1937   };
1938   /* Access bytes in DATA as unsigned char, otherwise the shifts below
1939      don't work for data with MSB set. */
1940   const unsigned char *s = data;
1941   /* Theoretical ANSI violation when length < 3. */
1942   const unsigned char *end = (const unsigned char *) data + length - 2;
1943   char *p = dest;
1944
1945   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1946   for (; s < end; s += 3)
1947     {
1948       *p++ = tbl[s[0] >> 2];
1949       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1950       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1951       *p++ = tbl[s[2] & 0x3f];
1952     }
1953
1954   /* Pad the result if necessary...  */
1955   switch (length % 3)
1956     {
1957     case 1:
1958       *p++ = tbl[s[0] >> 2];
1959       *p++ = tbl[(s[0] & 3) << 4];
1960       *p++ = '=';
1961       *p++ = '=';
1962       break;
1963     case 2:
1964       *p++ = tbl[s[0] >> 2];
1965       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1966       *p++ = tbl[((s[1] & 0xf) << 2)];
1967       *p++ = '=';
1968       break;
1969     }
1970   /* ...and zero-terminate it.  */
1971   *p = '\0';
1972
1973   return p - dest;
1974 }
1975
1976 /* Store in C the next non-whitespace character from the string, or \0
1977    when end of string is reached.  */
1978 #define NEXT_CHAR(c, p) do {                    \
1979   c = (unsigned char) *p++;                     \
1980 } while (c_isspace (c))
1981
1982 #define IS_ASCII(c) (((c) & 0x80) == 0)
1983
1984 /* Decode data from BASE64 (a null-terminated string) into memory
1985    pointed to by DEST.  DEST is assumed to be large enough to
1986    accomodate the decoded data, which is guaranteed to be no more than
1987    3/4*strlen(base64).
1988
1989    Since DEST is assumed to contain binary data, it is not
1990    NUL-terminated.  The function returns the length of the data
1991    written to TO.  -1 is returned in case of error caused by malformed
1992    base64 input.
1993
1994    This function originates from Free Recode.  */
1995
1996 int
1997 base64_decode (const char *base64, void *dest)
1998 {
1999   /* Table of base64 values for first 128 characters.  Note that this
2000      assumes ASCII (but so does Wget in other places).  */
2001   static const signed char base64_char_to_value[128] =
2002     {
2003       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2004       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2005       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2006       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2007       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2008       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2009       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2010       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2011       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2012       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2013       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2014       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2015       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2016     };
2017 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2018 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2019
2020   const char *p = base64;
2021   char *q = dest;
2022
2023   while (1)
2024     {
2025       unsigned char c;
2026       unsigned long value;
2027
2028       /* Process first byte of a quadruplet.  */
2029       NEXT_CHAR (c, p);
2030       if (!c)
2031         break;
2032       if (c == '=' || !IS_BASE64 (c))
2033         return -1;              /* illegal char while decoding base64 */
2034       value = BASE64_CHAR_TO_VALUE (c) << 18;
2035
2036       /* Process second byte of a quadruplet.  */
2037       NEXT_CHAR (c, p);
2038       if (!c)
2039         return -1;              /* premature EOF while decoding base64 */
2040       if (c == '=' || !IS_BASE64 (c))
2041         return -1;              /* illegal char while decoding base64 */
2042       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2043       *q++ = value >> 16;
2044
2045       /* Process third byte of a quadruplet.  */
2046       NEXT_CHAR (c, p);
2047       if (!c)
2048         return -1;              /* premature EOF while decoding base64 */
2049       if (!IS_BASE64 (c))
2050         return -1;              /* illegal char while decoding base64 */
2051
2052       if (c == '=')
2053         {
2054           NEXT_CHAR (c, p);
2055           if (!c)
2056             return -1;          /* premature EOF while decoding base64 */
2057           if (c != '=')
2058             return -1;          /* padding `=' expected but not found */
2059           continue;
2060         }
2061
2062       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2063       *q++ = 0xff & value >> 8;
2064
2065       /* Process fourth byte of a quadruplet.  */
2066       NEXT_CHAR (c, p);
2067       if (!c)
2068         return -1;              /* premature EOF while decoding base64 */
2069       if (c == '=')
2070         continue;
2071       if (!IS_BASE64 (c))
2072         return -1;              /* illegal char while decoding base64 */
2073
2074       value |= BASE64_CHAR_TO_VALUE (c);
2075       *q++ = 0xff & value;
2076     }
2077 #undef IS_BASE64
2078 #undef BASE64_CHAR_TO_VALUE
2079
2080   return q - (char *) dest;
2081 }
2082
2083 #undef IS_ASCII
2084 #undef NEXT_CHAR
2085 \f
2086 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2087    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2088
2089 static void
2090 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2091                     int (*cmpfun) (const void *, const void *))
2092 {
2093 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2094   if (from < to)
2095     {
2096       size_t i, j, k;
2097       size_t mid = (to + from) / 2;
2098       mergesort_internal (base, temp, size, from, mid, cmpfun);
2099       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2100       i = from;
2101       j = mid + 1;
2102       for (k = from; (i <= mid) && (j <= to); k++)
2103         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2104           memcpy (ELT (temp, k), ELT (base, i++), size);
2105         else
2106           memcpy (ELT (temp, k), ELT (base, j++), size);
2107       while (i <= mid)
2108         memcpy (ELT (temp, k++), ELT (base, i++), size);
2109       while (j <= to)
2110         memcpy (ELT (temp, k++), ELT (base, j++), size);
2111       for (k = from; k <= to; k++)
2112         memcpy (ELT (base, k), ELT (temp, k), size);
2113     }
2114 #undef ELT
2115 }
2116
2117 /* Stable sort with interface exactly like standard library's qsort.
2118    Uses mergesort internally, allocating temporary storage with
2119    alloca.  */
2120
2121 void
2122 stable_sort (void *base, size_t nmemb, size_t size,
2123              int (*cmpfun) (const void *, const void *))
2124 {
2125   if (size > 1)
2126     {
2127       void *temp = alloca (nmemb * size * sizeof (void *));
2128       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2129     }
2130 }
2131 \f
2132 /* Print a decimal number.  If it is equal to or larger than ten, the
2133    number is rounded.  Otherwise it is printed with one significant
2134    digit without trailing zeros and with no more than three fractional
2135    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2136    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2137
2138    This is useful for displaying durations because it provides
2139    order-of-magnitude information without unnecessary clutter --
2140    long-running downloads are shown without the fractional part, and
2141    short ones still retain one significant digit.  */
2142
2143 const char *
2144 print_decimal (double number)
2145 {
2146   static char buf[32];
2147   double n = number >= 0 ? number : -number;
2148
2149   if (n >= 9.95)
2150     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2151        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2152     snprintf (buf, sizeof buf, "%.0f", number);
2153   else if (n >= 0.95)
2154     snprintf (buf, sizeof buf, "%.1f", number);
2155   else if (n >= 0.001)
2156     snprintf (buf, sizeof buf, "%.1g", number);
2157   else if (n >= 0.0005)
2158     /* round [0.0005, 0.001) to 0.001 */
2159     snprintf (buf, sizeof buf, "%.3f", number);
2160   else
2161     /* print numbers close to 0 as 0, not 0.000 */
2162     strcpy (buf, "0");
2163
2164   return buf;
2165 }
2166
2167 #ifdef TESTING
2168
2169 const char *
2170 test_subdir_p()
2171 {
2172   int i;
2173   struct {
2174     char *d1;
2175     char *d2;
2176     bool result;
2177   } test_array[] = {
2178     { "/somedir", "/somedir", true },
2179     { "/somedir", "/somedir/d2", true },
2180     { "/somedir/d1", "/somedir", false },
2181   };
2182
2183   for (i = 0; i < countof(test_array); ++i)
2184     {
2185       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2186
2187       mu_assert ("test_subdir_p: wrong result",
2188                  res == test_array[i].result);
2189     }
2190
2191   return NULL;
2192 }
2193
2194 const char *
2195 test_dir_matches_p()
2196 {
2197   int i;
2198   struct {
2199     char *dirlist[3];
2200     char *dir;
2201     bool result;
2202   } test_array[] = {
2203     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2204     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2205     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2206     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2207     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2208     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2209     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2210     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2211     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2212     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2213     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2214     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2215     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2216   };
2217
2218   for (i = 0; i < countof(test_array); ++i)
2219     {
2220       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2221
2222       mu_assert ("test_dir_matches_p: wrong result",
2223                  res == test_array[i].result);
2224     }
2225
2226   return NULL;
2227 }
2228
2229 #endif /* TESTING */
2230