sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996-2005 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or
   9 (at your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software Foundation, Inc.,
  18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <time.h>
  36 #ifdef HAVE_SYS_TIME_H
  37 # include <sys/time.h>
  38 #endif
  39 #ifdef HAVE_UNISTD_H
  40 # include <unistd.h>
  41 #endif
  42 #ifdef HAVE_MMAP
  43 # include <sys/mman.h>
  44 #endif
  45 #ifdef HAVE_UTIME_H
  46 # include <utime.h>
  47 #endif
  48 #ifdef HAVE_SYS_UTIME_H
  49 # include <sys/utime.h>
  50 #endif
  51 #include <errno.h>
  52 #include <fcntl.h>
  53 #include <assert.h>
  54 #include <stdarg.h>
  55 #include <locale.h>
  56
  57 /* For TIOCGWINSZ and friends: */
  58 #ifdef HAVE_SYS_IOCTL_H
  59 # include <sys/ioctl.h>
  60 #endif
  61 #ifdef HAVE_TERMIOS_H
  62 # include <termios.h>
  63 #endif
  64
  65 /* Needed for Unix version of run_with_timeout. */
  66 #include <signal.h>
  67 #include <setjmp.h>
  68
  69 #ifndef HAVE_SIGSETJMP
  70 /* If sigsetjmp is a macro, configure won't pick it up. */
  71 # ifdef sigsetjmp
  72 #  define HAVE_SIGSETJMP
  73 # endif
  74 #endif
  75
  76 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  77 # define USE_SIGNAL_TIMEOUT
  78 #endif
  79
  80 #include "wget.h"
  81 #include "utils.h"
  82 #include "hash.h"
  83
  84 /* Utility function: like xstrdup(), but also lowercases S.  */
  85
  86 char *
  87 xstrdup_lower (const char *s)
  88 {
  89   char *copy = xstrdup (s);
  90   char *p = copy;
  91   for (; *p; p++)
  92     *p = TOLOWER (*p);
  93   return copy;
  94 }
  95
  96 /* Copy the string formed by two pointers (one on the beginning, other
  97    on the char after the last char) to a new, malloc-ed location.
  98    0-terminate it.  */
  99 char *
 100 strdupdelim (const char *beg, const char *end)
 101 {
 102   char *res = xmalloc (end - beg + 1);
 103   memcpy (res, beg, end - beg);
 104   res[end - beg] = '\0';
 105   return res;
 106 }
 107
 108 /* Parse a string containing comma-separated elements, and return a
 109    vector of char pointers with the elements.  Spaces following the
 110    commas are ignored.  */
 111 char **
 112 sepstring (const char *s)
 113 {
 114   char **res;
 115   const char *p;
 116   int i = 0;
 117
 118   if (!s || !*s)
 119     return NULL;
 120   res = NULL;
 121   p = s;
 122   while (*s)
 123     {
 124       if (*s == ',')
 125         {
 126           res = xrealloc (res, (i + 2) * sizeof (char *));
 127           res[i] = strdupdelim (p, s);
 128           res[++i] = NULL;
 129           ++s;
 130           /* Skip the blanks following the ','.  */
 131           while (ISSPACE (*s))
 132             ++s;
 133           p = s;
 134         }
 135       else
 136         ++s;
 137     }
 138   res = xrealloc (res, (i + 2) * sizeof (char *));
 139   res[i] = strdupdelim (p, s);
 140   res[i + 1] = NULL;
 141   return res;
 142 }
 143 \f
 144 /* Like sprintf, but prints into a string of sufficient size freshly
 145    allocated with malloc, which is returned.  If unable to print due
 146    to invalid format, returns NULL.  Inability to allocate needed
 147    memory results in abort, as with xmalloc.  This is in spirit
 148    similar to the GNU/BSD extension asprintf, but somewhat easier to
 149    use.
 150
 151    Internally the function either calls vasprintf or loops around
 152    vsnprintf until the correct size is found.  Since Wget also ships a
 153    fallback implementation of vsnprintf, this should be portable.  */
 154
 155 char *
 156 aprintf (const char *fmt, ...)
 157 {
 158 #ifdef HAVE_VASPRINTF
 159   /* Use vasprintf. */
 160   int ret;
 161   va_list args;
 162   char *str;
 163   va_start (args, fmt);
 164   ret = vasprintf (&str, fmt, args);
 165   va_end (args);
 166   if (ret < 0 && errno == ENOMEM)
 167     abort ();                   /* for consistency with xmalloc/xrealloc */
 168   else if (ret < 0)
 169     return NULL;
 170   return str;
 171 #else  /* not HAVE_VASPRINTF */
 172
 173   /* vasprintf is unavailable.  snprintf into a small buffer and
 174      resize it as necessary. */
 175   int size = 32;
 176   char *str = xmalloc (size);
 177
 178   while (1)
 179     {
 180       int n;
 181       va_list args;
 182
 183       va_start (args, fmt);
 184       n = vsnprintf (str, size, fmt, args);
 185       va_end (args);
 186
 187       /* If the printing worked, return the string. */
 188       if (n > -1 && n < size)
 189         return str;
 190
 191       /* Else try again with a larger buffer. */
 192       if (n > -1)               /* C99 */
 193         size = n + 1;           /* precisely what is needed */
 194       else
 195         size <<= 1;             /* twice the old size */
 196       str = xrealloc (str, size);
 197     }
 198 #endif /* not HAVE_VASPRINTF */
 199 }
 200
 201 /* Concatenate the NULL-terminated list of string arguments into
 202    freshly allocated space.  */
 203
 204 char *
 205 concat_strings (const char *str0, ...)
 206 {
 207   va_list args;
 208   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 209   char *ret, *p;
 210
 211   const char *next_str;
 212   int total_length = 0;
 213   int argcount;
 214
 215   /* Calculate the length of and allocate the resulting string. */
 216
 217   argcount = 0;
 218   va_start (args, str0);
 219   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 220     {
 221       int len = strlen (next_str);
 222       if (argcount < countof (saved_lengths))
 223         saved_lengths[argcount++] = len;
 224       total_length += len;
 225     }
 226   va_end (args);
 227   p = ret = xmalloc (total_length + 1);
 228
 229   /* Copy the strings into the allocated space. */
 230
 231   argcount = 0;
 232   va_start (args, str0);
 233   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 234     {
 235       int len;
 236       if (argcount < countof (saved_lengths))
 237         len = saved_lengths[argcount++];
 238       else
 239         len = strlen (next_str);
 240       memcpy (p, next_str, len);
 241       p += len;
 242     }
 243   va_end (args);
 244   *p = '\0';
 245
 246   return ret;
 247 }
 248 \f
 249 /* Return pointer to a static char[] buffer in which zero-terminated
 250    string-representation of TM (in form hh:mm:ss) is printed.
 251
 252    If TM is NULL, the current time will be used.  */
 253
 254 char *
 255 time_str (time_t *tm)
 256 {
 257   static char output[15];
 258   struct tm *ptm;
 259   time_t secs = tm ? *tm : time (NULL);
 260
 261   if (secs == -1)
 262     {
 263       /* In case of error, return the empty string.  Maybe we should
 264          just abort if this happens?  */
 265       *output = '\0';
 266       return output;
 267     }
 268   ptm = localtime (&secs);
 269   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 270   return output;
 271 }
 272
 273 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 274
 275 char *
 276 datetime_str (time_t *tm)
 277 {
 278   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 279   struct tm *ptm;
 280   time_t secs = tm ? *tm : time (NULL);
 281
 282   if (secs == -1)
 283     {
 284       /* In case of error, return the empty string.  Maybe we should
 285          just abort if this happens?  */
 286       *output = '\0';
 287       return output;
 288     }
 289   ptm = localtime (&secs);
 290   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 291            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 292            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 293   return output;
 294 }
 295 \f
 296 /* The Windows versions of the following two functions are defined in
 297    mswindows.c.  */
 298
 299 #ifndef WINDOWS
 300 void
 301 fork_to_background (void)
 302 {
 303   pid_t pid;
 304   /* Whether we arrange our own version of opt.lfilename here.  */
 305   bool logfile_changed = false;
 306
 307   if (!opt.lfilename)
 308     {
 309       /* We must create the file immediately to avoid either a race
 310          condition (which arises from using unique_name and failing to
 311          use fopen_excl) or lying to the user about the log file name
 312          (which arises from using unique_name, printing the name, and
 313          using fopen_excl later on.)  */
 314       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 315       if (new_log_fp)
 316         {
 317           logfile_changed = true;
 318           fclose (new_log_fp);
 319         }
 320     }
 321   pid = fork ();
 322   if (pid < 0)
 323     {
 324       /* parent, error */
 325       perror ("fork");
 326       exit (1);
 327     }
 328   else if (pid != 0)
 329     {
 330       /* parent, no error */
 331       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 332       if (logfile_changed)
 333         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 334       exit (0);                 /* #### should we use _exit()? */
 335     }
 336
 337   /* child: give up the privileges and keep running. */
 338   setsid ();
 339   freopen ("/dev/null", "r", stdin);
 340   freopen ("/dev/null", "w", stdout);
 341   freopen ("/dev/null", "w", stderr);
 342 }
 343 #endif /* not WINDOWS */
 344 \f
 345 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 346    specified with TM.  The atime ("access time") is set to the current
 347    time.  */
 348
 349 void
 350 touch (const char *file, time_t tm)
 351 {
 352 #ifdef HAVE_STRUCT_UTIMBUF
 353   struct utimbuf times;
 354 #else
 355   struct {
 356     time_t actime;
 357     time_t modtime;
 358   } times;
 359 #endif
 360   times.modtime = tm;
 361   times.actime = time (NULL);
 362   if (utime (file, &times) == -1)
 363     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 364 }
 365
 366 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 367    nothing under MS-Windows.  */
 368 int
 369 remove_link (const char *file)
 370 {
 371   int err = 0;
 372   struct_stat st;
 373
 374   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 375     {
 376       DEBUGP (("Unlinking %s (symlink).\n", file));
 377       err = unlink (file);
 378       if (err != 0)
 379         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 380                    file, strerror (errno));
 381     }
 382   return err;
 383 }
 384
 385 /* Does FILENAME exist?  This is quite a lousy implementation, since
 386    it supplies no error codes -- only a yes-or-no answer.  Thus it
 387    will return that a file does not exist if, e.g., the directory is
 388    unreadable.  I don't mind it too much currently, though.  The
 389    proper way should, of course, be to have a third, error state,
 390    other than true/false, but that would introduce uncalled-for
 391    additional complexity to the callers.  */
 392 bool
 393 file_exists_p (const char *filename)
 394 {
 395 #ifdef HAVE_ACCESS
 396   return access (filename, F_OK) >= 0;
 397 #else
 398   struct_stat buf;
 399   return stat (filename, &buf) >= 0;
 400 #endif
 401 }
 402
 403 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 404    Returns 0 on error.  */
 405 bool
 406 file_non_directory_p (const char *path)
 407 {
 408   struct_stat buf;
 409   /* Use lstat() rather than stat() so that symbolic links pointing to
 410      directories can be identified correctly.  */
 411   if (lstat (path, &buf) != 0)
 412     return false;
 413   return S_ISDIR (buf.st_mode) ? false : true;
 414 }
 415
 416 /* Return the size of file named by FILENAME, or -1 if it cannot be
 417    opened or seeked into. */
 418 wgint
 419 file_size (const char *filename)
 420 {
 421 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 422   wgint size;
 423   /* We use fseek rather than stat to determine the file size because
 424      that way we can also verify that the file is readable without
 425      explicitly checking for permissions.  Inspired by the POST patch
 426      by Arnaud Wylie.  */
 427   FILE *fp = fopen (filename, "rb");
 428   if (!fp)
 429     return -1;
 430   fseeko (fp, 0, SEEK_END);
 431   size = ftello (fp);
 432   fclose (fp);
 433   return size;
 434 #else
 435   struct_stat st;
 436   if (stat (filename, &st) < 0)
 437     return -1;
 438   return st.st_size;
 439 #endif
 440 }
 441
 442 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 443    doesn't exist is found.  Return a freshly allocated copy of the
 444    unused file name.  */
 445
 446 static char *
 447 unique_name_1 (const char *prefix)
 448 {
 449   int count = 1;
 450   int plen = strlen (prefix);
 451   char *template = (char *)alloca (plen + 1 + 24);
 452   char *template_tail = template + plen;
 453
 454   memcpy (template, prefix, plen);
 455   *template_tail++ = '.';
 456
 457   do
 458     number_to_string (template_tail, count++);
 459   while (file_exists_p (template));
 460
 461   return xstrdup (template);
 462 }
 463
 464 /* Return a unique file name, based on FILE.
 465
 466    More precisely, if FILE doesn't exist, it is returned unmodified.
 467    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 468    file name that doesn't exist is returned.
 469
 470    The resulting file is not created, only verified that it didn't
 471    exist at the point in time when the function was called.
 472    Therefore, where security matters, don't rely that the file created
 473    by this function exists until you open it with O_EXCL or
 474    equivalent.
 475
 476    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 477    string.  Otherwise, it may return FILE if the file doesn't exist
 478    (and therefore doesn't need changing).  */
 479
 480 char *
 481 unique_name (const char *file, bool allow_passthrough)
 482 {
 483   /* If the FILE itself doesn't exist, return it without
 484      modification. */
 485   if (!file_exists_p (file))
 486     return allow_passthrough ? (char *)file : xstrdup (file);
 487
 488   /* Otherwise, find a numeric suffix that results in unused file name
 489      and return it.  */
 490   return unique_name_1 (file);
 491 }
 492
 493 /* Create a file based on NAME, except without overwriting an existing
 494    file with that name.  Providing O_EXCL is correctly implemented,
 495    this function does not have the race condition associated with
 496    opening the file returned by unique_name.  */
 497
 498 FILE *
 499 unique_create (const char *name, bool binary, char **opened_name)
 500 {
 501   /* unique file name, based on NAME */
 502   char *uname = unique_name (name, false);
 503   FILE *fp;
 504   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 505     {
 506       xfree (uname);
 507       uname = unique_name (name, false);
 508     }
 509   if (opened_name && fp != NULL)
 510     {
 511       if (fp)
 512         *opened_name = uname;
 513       else
 514         {
 515           *opened_name = NULL;
 516           xfree (uname);
 517         }
 518     }
 519   else
 520     xfree (uname);
 521   return fp;
 522 }
 523
 524 /* Open the file for writing, with the addition that the file is
 525    opened "exclusively".  This means that, if the file already exists,
 526    this function will *fail* and errno will be set to EEXIST.  If
 527    BINARY is set, the file will be opened in binary mode, equivalent
 528    to fopen's "wb".
 529
 530    If opening the file fails for any reason, including the file having
 531    previously existed, this function returns NULL and sets errno
 532    appropriately.  */
 533
 534 FILE *
 535 fopen_excl (const char *fname, bool binary)
 536 {
 537   int fd;
 538 #ifdef O_EXCL
 539   int flags = O_WRONLY | O_CREAT | O_EXCL;
 540 # ifdef O_BINARY
 541   if (binary)
 542     flags |= O_BINARY;
 543 # endif
 544   fd = open (fname, flags, 0666);
 545   if (fd < 0)
 546     return NULL;
 547   return fdopen (fd, binary ? "wb" : "w");
 548 #else  /* not O_EXCL */
 549   /* Manually check whether the file exists.  This is prone to race
 550      conditions, but systems without O_EXCL haven't deserved
 551      better.  */
 552   if (file_exists_p (fname))
 553     {
 554       errno = EEXIST;
 555       return NULL;
 556     }
 557   return fopen (fname, binary ? "wb" : "w");
 558 #endif /* not O_EXCL */
 559 }
 560 \f
 561 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 562    are missing, create them first.  In case any mkdir() call fails,
 563    return its error status.  Returns 0 on successful completion.
 564
 565    The behaviour of this function should be identical to the behaviour
 566    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 567 int
 568 make_directory (const char *directory)
 569 {
 570   int i, ret, quit = 0;
 571   char *dir;
 572
 573   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 574      function is unsafe if called with a read-only char *argument.  */
 575   STRDUP_ALLOCA (dir, directory);
 576
 577   /* If the first character of dir is '/', skip it (and thus enable
 578      creation of absolute-pathname directories.  */
 579   for (i = (*dir == '/'); 1; ++i)
 580     {
 581       for (; dir[i] && dir[i] != '/'; i++)
 582         ;
 583       if (!dir[i])
 584         quit = 1;
 585       dir[i] = '\0';
 586       /* Check whether the directory already exists.  Allow creation of
 587          of intermediate directories to fail, as the initial path components
 588          are not necessarily directories!  */
 589       if (!file_exists_p (dir))
 590         ret = mkdir (dir, 0777);
 591       else
 592         ret = 0;
 593       if (quit)
 594         break;
 595       else
 596         dir[i] = '/';
 597     }
 598   return ret;
 599 }
 600
 601 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 602    should be a file name.
 603
 604    file_merge("/foo/bar", "baz")  => "/foo/baz"
 605    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 606    file_merge("foo", "bar")       => "bar"
 607
 608    In other words, it's a simpler and gentler version of uri_merge.  */
 609
 610 char *
 611 file_merge (const char *base, const char *file)
 612 {
 613   char *result;
 614   const char *cut = (const char *)strrchr (base, '/');
 615
 616   if (!cut)
 617     return xstrdup (file);
 618
 619   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 620   memcpy (result, base, cut - base);
 621   result[cut - base] = '/';
 622   strcpy (result + (cut - base) + 1, file);
 623
 624   return result;
 625 }
 626 \f
 627 /* Like fnmatch, but performs a case-insensitive match.  */
 628
 629 int
 630 fnmatch_nocase (const char *pattern, const char *string, int flags)
 631 {
 632 #ifdef FNM_CASEFOLD
 633   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 634      also present on *BSD platforms, and possibly elsewhere.  */
 635   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 636 #else
 637   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 638   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 639   char *strcopy = (char *) alloca (strlen (string) + 1);
 640   char *p;
 641   for (p = patcopy; *pattern; pattern++, p++)
 642     *p = TOLOWER (*pattern);
 643   *p = '\0';
 644   for (p = strcopy; *string; string++, p++)
 645     *p = TOLOWER (*string);
 646   *p = '\0';
 647   return fnmatch (patcopy, strcopy, flags);
 648 #endif
 649 }
 650
 651 static bool in_acclist (const char *const *, const char *, bool);
 652
 653 /* Determine whether a file is acceptable to be followed, according to
 654    lists of patterns to accept/reject.  */
 655 bool
 656 acceptable (const char *s)
 657 {
 658   int l = strlen (s);
 659
 660   while (l && s[l] != '/')
 661     --l;
 662   if (s[l] == '/')
 663     s += (l + 1);
 664   if (opt.accepts)
 665     {
 666       if (opt.rejects)
 667         return (in_acclist ((const char *const *)opt.accepts, s, true)
 668                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 669       else
 670         return in_acclist ((const char *const *)opt.accepts, s, true);
 671     }
 672   else if (opt.rejects)
 673     return !in_acclist ((const char *const *)opt.rejects, s, true);
 674   return true;
 675 }
 676
 677 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 678    `/something', frontcmp() will return true only if S2 begins with
 679    `/something'.  */
 680 bool
 681 frontcmp (const char *s1, const char *s2)
 682 {
 683   if (!opt.ignore_case)
 684     for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
 685       ;
 686   else
 687     for (; *s1 && *s2 && (TOLOWER (*s1) == TOLOWER (*s2)); ++s1, ++s2)
 688       ;
 689   return *s1 == '\0';
 690 }
 691
 692 /* Iterate through STRLIST, and return the first element that matches
 693    S, through wildcards or front comparison (as appropriate).  */
 694 static char *
 695 proclist (char **strlist, const char *s)
 696 {
 697   char **x;
 698   int (*matcher) (const char *, const char *, int)
 699     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 700
 701   for (x = strlist; *x; x++)
 702     {
 703       /* Remove leading '/' */
 704       char *p = *x + (**x == '/');
 705       if (has_wildcards_p (p))
 706         {
 707           if (matcher (p, s, FNM_PATHNAME) == 0)
 708             break;
 709         }
 710       else
 711         {
 712           if (frontcmp (p, s))
 713             break;
 714         }
 715     }
 716   return *x;
 717 }
 718
 719 /* Returns whether DIRECTORY is acceptable for download, wrt the
 720    include/exclude lists.
 721
 722    The leading `/' is ignored in paths; relative and absolute paths
 723    may be freely intermixed.  */
 724
 725 bool
 726 accdir (const char *directory)
 727 {
 728   /* Remove starting '/'.  */
 729   if (*directory == '/')
 730     ++directory;
 731   if (opt.includes)
 732     {
 733       if (!proclist (opt.includes, directory))
 734         return false;
 735     }
 736   if (opt.excludes)
 737     {
 738       if (proclist (opt.excludes, directory))
 739         return false;
 740     }
 741   return true;
 742 }
 743
 744 /* Return true if STRING ends with TAIL.  For instance:
 745
 746    match_tail ("abc", "bc", false)  -> 1
 747    match_tail ("abc", "ab", false)  -> 0
 748    match_tail ("abc", "abc", false) -> 1
 749
 750    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 751
 752 bool
 753 match_tail (const char *string, const char *tail, bool fold_case)
 754 {
 755   int i, j;
 756
 757   /* We want this to be fast, so we code two loops, one with
 758      case-folding, one without. */
 759
 760   if (!fold_case)
 761     {
 762       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 763         if (string[i] != tail[j])
 764           break;
 765     }
 766   else
 767     {
 768       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 769         if (TOLOWER (string[i]) != TOLOWER (tail[j]))
 770           break;
 771     }
 772
 773   /* If the tail was exhausted, the match was succesful.  */
 774   if (j == -1)
 775     return true;
 776   else
 777     return false;
 778 }
 779
 780 /* Checks whether string S matches each element of ACCEPTS.  A list
 781    element are matched either with fnmatch() or match_tail(),
 782    according to whether the element contains wildcards or not.
 783
 784    If the BACKWARD is false, don't do backward comparison -- just compare
 785    them normally.  */
 786 static bool
 787 in_acclist (const char *const *accepts, const char *s, bool backward)
 788 {
 789   for (; *accepts; accepts++)
 790     {
 791       if (has_wildcards_p (*accepts))
 792         {
 793           int res = opt.ignore_case
 794             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
 795           /* fnmatch returns 0 if the pattern *does* match the string.  */
 796           if (res == 0)
 797             return true;
 798         }
 799       else
 800         {
 801           if (backward)
 802             {
 803               if (match_tail (s, *accepts, opt.ignore_case))
 804                 return true;
 805             }
 806           else
 807             {
 808               int cmp = opt.ignore_case
 809                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
 810               if (cmp == 0)
 811                 return true;
 812             }
 813         }
 814     }
 815   return false;
 816 }
 817
 818 /* Return the location of STR's suffix (file extension).  Examples:
 819    suffix ("foo.bar")       -> "bar"
 820    suffix ("foo.bar.baz")   -> "baz"
 821    suffix ("/foo/bar")      -> NULL
 822    suffix ("/foo.bar/baz")  -> NULL  */
 823 char *
 824 suffix (const char *str)
 825 {
 826   int i;
 827
 828   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 829     ;
 830
 831   if (str[i++] == '.')
 832     return (char *)str + i;
 833   else
 834     return NULL;
 835 }
 836
 837 /* Return true if S contains globbing wildcards (`*', `?', `[' or
 838    `]').  */
 839
 840 bool
 841 has_wildcards_p (const char *s)
 842 {
 843   for (; *s; s++)
 844     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
 845       return true;
 846   return false;
 847 }
 848
 849 /* Return true if FNAME ends with a typical HTML suffix.  The
 850    following (case-insensitive) suffixes are presumed to be HTML
 851    files:
 852
 853      html
 854      htm
 855      ?html (`?' matches one character)
 856
 857    #### CAVEAT.  This is not necessarily a good indication that FNAME
 858    refers to a file that contains HTML!  */
 859 bool
 860 has_html_suffix_p (const char *fname)
 861 {
 862   char *suf;
 863
 864   if ((suf = suffix (fname)) == NULL)
 865     return false;
 866   if (!strcasecmp (suf, "html"))
 867     return true;
 868   if (!strcasecmp (suf, "htm"))
 869     return true;
 870   if (suf[0] && !strcasecmp (suf + 1, "html"))
 871     return true;
 872   return false;
 873 }
 874
 875 /* Read a line from FP and return the pointer to freshly allocated
 876    storage.  The storage space is obtained through malloc() and should
 877    be freed with free() when it is no longer needed.
 878
 879    The length of the line is not limited, except by available memory.
 880    The newline character at the end of line is retained.  The line is
 881    terminated with a zero character.
 882
 883    After end-of-file is encountered without anything being read, NULL
 884    is returned.  NULL is also returned on error.  To distinguish
 885    between these two cases, use the stdio function ferror().  */
 886
 887 char *
 888 read_whole_line (FILE *fp)
 889 {
 890   int length = 0;
 891   int bufsize = 82;
 892   char *line = xmalloc (bufsize);
 893
 894   while (fgets (line + length, bufsize - length, fp))
 895     {
 896       length += strlen (line + length);
 897       if (length == 0)
 898         /* Possible for example when reading from a binary file where
 899            a line begins with \0.  */
 900         continue;
 901
 902       if (line[length - 1] == '\n')
 903         break;
 904
 905       /* fgets() guarantees to read the whole line, or to use up the
 906          space we've given it.  We can double the buffer
 907          unconditionally.  */
 908       bufsize <<= 1;
 909       line = xrealloc (line, bufsize);
 910     }
 911   if (length == 0 || ferror (fp))
 912     {
 913       xfree (line);
 914       return NULL;
 915     }
 916   if (length + 1 < bufsize)
 917     /* Relieve the memory from our exponential greediness.  We say
 918        `length + 1' because the terminating \0 is not included in
 919        LENGTH.  We don't need to zero-terminate the string ourselves,
 920        though, because fgets() does that.  */
 921     line = xrealloc (line, length + 1);
 922   return line;
 923 }
 924 \f
 925 /* Read FILE into memory.  A pointer to `struct file_memory' are
 926    returned; use struct element `content' to access file contents, and
 927    the element `length' to know the file length.  `content' is *not*
 928    zero-terminated, and you should *not* read or write beyond the [0,
 929    length) range of characters.
 930
 931    After you are done with the file contents, call read_file_free to
 932    release the memory.
 933
 934    Depending on the operating system and the type of file that is
 935    being read, read_file() either mmap's the file into memory, or
 936    reads the file into the core using read().
 937
 938    If file is named "-", fileno(stdin) is used for reading instead.
 939    If you want to read from a real file named "-", use "./-" instead.  */
 940
 941 struct file_memory *
 942 read_file (const char *file)
 943 {
 944   int fd;
 945   struct file_memory *fm;
 946   long size;
 947   bool inhibit_close = false;
 948
 949   /* Some magic in the finest tradition of Perl and its kin: if FILE
 950      is "-", just use stdin.  */
 951   if (HYPHENP (file))
 952     {
 953       fd = fileno (stdin);
 954       inhibit_close = true;
 955       /* Note that we don't inhibit mmap() in this case.  If stdin is
 956          redirected from a regular file, mmap() will still work.  */
 957     }
 958   else
 959     fd = open (file, O_RDONLY);
 960   if (fd < 0)
 961     return NULL;
 962   fm = xnew (struct file_memory);
 963
 964 #ifdef HAVE_MMAP
 965   {
 966     struct_fstat buf;
 967     if (fstat (fd, &buf) < 0)
 968       goto mmap_lose;
 969     fm->length = buf.st_size;
 970     /* NOTE: As far as I know, the callers of this function never
 971        modify the file text.  Relying on this would enable us to
 972        specify PROT_READ and MAP_SHARED for a marginal gain in
 973        efficiency, but at some cost to generality.  */
 974     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
 975                         MAP_PRIVATE, fd, 0);
 976     if (fm->content == (char *)MAP_FAILED)
 977       goto mmap_lose;
 978     if (!inhibit_close)
 979       close (fd);
 980
 981     fm->mmap_p = 1;
 982     return fm;
 983   }
 984
 985  mmap_lose:
 986   /* The most common reason why mmap() fails is that FD does not point
 987      to a plain file.  However, it's also possible that mmap() doesn't
 988      work for a particular type of file.  Therefore, whenever mmap()
 989      fails, we just fall back to the regular method.  */
 990 #endif /* HAVE_MMAP */
 991
 992   fm->length = 0;
 993   size = 512;                   /* number of bytes fm->contents can
 994                                    hold at any given time. */
 995   fm->content = xmalloc (size);
 996   while (1)
 997     {
 998       wgint nread;
 999       if (fm->length > size / 2)
1000         {
1001           /* #### I'm not sure whether the whole exponential-growth
1002              thing makes sense with kernel read.  On Linux at least,
1003              read() refuses to read more than 4K from a file at a
1004              single chunk anyway.  But other Unixes might optimize it
1005              better, and it doesn't *hurt* anything, so I'm leaving
1006              it.  */
1007
1008           /* Normally, we grow SIZE exponentially to make the number
1009              of calls to read() and realloc() logarithmic in relation
1010              to file size.  However, read() can read an amount of data
1011              smaller than requested, and it would be unreasonable to
1012              double SIZE every time *something* was read.  Therefore,
1013              we double SIZE only when the length exceeds half of the
1014              entire allocated size.  */
1015           size <<= 1;
1016           fm->content = xrealloc (fm->content, size);
1017         }
1018       nread = read (fd, fm->content + fm->length, size - fm->length);
1019       if (nread > 0)
1020         /* Successful read. */
1021         fm->length += nread;
1022       else if (nread < 0)
1023         /* Error. */
1024         goto lose;
1025       else
1026         /* EOF */
1027         break;
1028     }
1029   if (!inhibit_close)
1030     close (fd);
1031   if (size > fm->length && fm->length != 0)
1032     /* Due to exponential growth of fm->content, the allocated region
1033        might be much larger than what is actually needed.  */
1034     fm->content = xrealloc (fm->content, fm->length);
1035   fm->mmap_p = 0;
1036   return fm;
1037
1038  lose:
1039   if (!inhibit_close)
1040     close (fd);
1041   xfree (fm->content);
1042   xfree (fm);
1043   return NULL;
1044 }
1045
1046 /* Release the resources held by FM.  Specifically, this calls
1047    munmap() or xfree() on fm->content, depending whether mmap or
1048    malloc/read were used to read in the file.  It also frees the
1049    memory needed to hold the FM structure itself.  */
1050
1051 void
1052 read_file_free (struct file_memory *fm)
1053 {
1054 #ifdef HAVE_MMAP
1055   if (fm->mmap_p)
1056     {
1057       munmap (fm->content, fm->length);
1058     }
1059   else
1060 #endif
1061     {
1062       xfree (fm->content);
1063     }
1064   xfree (fm);
1065 }
1066 \f
1067 /* Free the pointers in a NULL-terminated vector of pointers, then
1068    free the pointer itself.  */
1069 void
1070 free_vec (char **vec)
1071 {
1072   if (vec)
1073     {
1074       char **p = vec;
1075       while (*p)
1076         xfree (*p++);
1077       xfree (vec);
1078     }
1079 }
1080
1081 /* Append vector V2 to vector V1.  The function frees V2 and
1082    reallocates V1 (thus you may not use the contents of neither
1083    pointer after the call).  If V1 is NULL, V2 is returned.  */
1084 char **
1085 merge_vecs (char **v1, char **v2)
1086 {
1087   int i, j;
1088
1089   if (!v1)
1090     return v2;
1091   if (!v2)
1092     return v1;
1093   if (!*v2)
1094     {
1095       /* To avoid j == 0 */
1096       xfree (v2);
1097       return v1;
1098     }
1099   /* Count v1.  */
1100   for (i = 0; v1[i]; i++)
1101     ;
1102   /* Count v2.  */
1103   for (j = 0; v2[j]; j++)
1104     ;
1105   /* Reallocate v1.  */
1106   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1107   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1108   xfree (v2);
1109   return v1;
1110 }
1111
1112 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1113    is allocated as needed.  Return the new value of the vector. */
1114
1115 char **
1116 vec_append (char **vec, const char *str)
1117 {
1118   int cnt;                      /* count of vector elements, including
1119                                    the one we're about to append */
1120   if (vec != NULL)
1121     {
1122       for (cnt = 0; vec[cnt]; cnt++)
1123         ;
1124       ++cnt;
1125     }
1126   else
1127     cnt = 1;
1128   /* Reallocate the array to fit the new element and the NULL. */
1129   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1130   /* Append a copy of STR to the vector. */
1131   vec[cnt - 1] = xstrdup (str);
1132   vec[cnt] = NULL;
1133   return vec;
1134 }
1135 \f
1136 /* Sometimes it's useful to create "sets" of strings, i.e. special
1137    hash tables where you want to store strings as keys and merely
1138    query for their existence.  Here is a set of utility routines that
1139    makes that transparent.  */
1140
1141 void
1142 string_set_add (struct hash_table *ht, const char *s)
1143 {
1144   /* First check whether the set element already exists.  If it does,
1145      do nothing so that we don't have to free() the old element and
1146      then strdup() a new one.  */
1147   if (hash_table_contains (ht, s))
1148     return;
1149
1150   /* We use "1" as value.  It provides us a useful and clear arbitrary
1151      value, and it consumes no memory -- the pointers to the same
1152      string "1" will be shared by all the key-value pairs in all `set'
1153      hash tables.  */
1154   hash_table_put (ht, xstrdup (s), "1");
1155 }
1156
1157 /* Synonym for hash_table_contains... */
1158
1159 int
1160 string_set_contains (struct hash_table *ht, const char *s)
1161 {
1162   return hash_table_contains (ht, s);
1163 }
1164
1165 static int
1166 string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
1167 {
1168   char ***arrayptr = (char ***) arg;
1169   *(*arrayptr)++ = (char *) key;
1170   return 0;
1171 }
1172
1173 /* Convert the specified string set to array.  ARRAY should be large
1174    enough to hold hash_table_count(ht) char pointers.  */
1175
1176 void string_set_to_array (struct hash_table *ht, char **array)
1177 {
1178   hash_table_map (ht, string_set_to_array_mapper, &array);
1179 }
1180
1181 static int
1182 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1183 {
1184   xfree (key);
1185   return 0;
1186 }
1187
1188 void
1189 string_set_free (struct hash_table *ht)
1190 {
1191   hash_table_map (ht, string_set_free_mapper, NULL);
1192   hash_table_destroy (ht);
1193 }
1194
1195 static int
1196 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1197 {
1198   xfree (key);
1199   xfree (value);
1200   return 0;
1201 }
1202
1203 /* Another utility function: call free() on all keys and values of HT.  */
1204
1205 void
1206 free_keys_and_values (struct hash_table *ht)
1207 {
1208   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1209 }
1210
1211 \f
1212 /* Get grouping data, the separator and grouping info, by calling
1213    localeconv().  The information is cached after the first call to
1214    the function.
1215
1216    In locales that don't set a thousand separator (such as the "C"
1217    locale), this forces it to be ",".  We are now only showing
1218    thousand separators in one place, so this shouldn't be a problem in
1219    practice.  */
1220
1221 static void
1222 get_grouping_data (const char **sep, const char **grouping)
1223 {
1224   static const char *cached_sep;
1225   static const char *cached_grouping;
1226   static bool initialized;
1227   if (!initialized)
1228     {
1229       /* Get the grouping info from the locale. */
1230       struct lconv *lconv = localeconv ();
1231       cached_sep = lconv->thousands_sep;
1232       cached_grouping = lconv->grouping;
1233       if (!*cached_sep)
1234         {
1235           /* Many locales (such as "C" or "hr_HR") don't specify
1236              grouping, which we still want to use it for legibility.
1237              In those locales set the sep char to ',', unless that
1238              character is used for decimal point, in which case set it
1239              to ".".  */
1240           if (*lconv->decimal_point != ',')
1241             cached_sep = ",";
1242           else
1243             cached_sep = ".";
1244           cached_grouping = "\x03";
1245         }
1246       initialized = true;
1247     }
1248   *sep = cached_sep;
1249   *grouping = cached_grouping;
1250 }
1251
1252 /* Return a printed representation of N with thousand separators.
1253    This should respect locale settings, with the exception of the "C"
1254    locale which mandates no separator, but we use one anyway.
1255
1256    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1257    the separators because it's too non-portable, and it's hard to test
1258    for this feature at configure time.  Besides, it wouldn't work in
1259    the "C" locale, which many Unix users still work in.  */
1260
1261 const char *
1262 with_thousand_seps (wgint n)
1263 {
1264   static char outbuf[48];
1265   char *p = outbuf + sizeof outbuf;
1266
1267   /* Info received from locale */
1268   const char *grouping, *sep;
1269   int seplen;
1270
1271   /* State information */
1272   int i = 0, groupsize;
1273   const char *atgroup;
1274
1275   bool negative = n < 0;
1276
1277   /* Initialize grouping data. */
1278   get_grouping_data (&sep, &grouping);
1279   seplen = strlen (sep);
1280   atgroup = grouping;
1281   groupsize = *atgroup++;
1282
1283   /* This will overflow on WGINT_MIN, but we're not using this to
1284      print negative numbers anyway.  */
1285   if (negative)
1286     n = -n;
1287
1288   /* Write the number into the buffer, backwards, inserting the
1289      separators as necessary.  */
1290   *--p = '\0';
1291   while (1)
1292     {
1293       *--p = n % 10 + '0';
1294       n /= 10;
1295       if (n == 0)
1296         break;
1297       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1298       if (++i == groupsize)
1299         {
1300           if (seplen == 1)
1301             *--p = *sep;
1302           else
1303             memcpy (p -= seplen, sep, seplen);
1304           i = 0;
1305           if (*atgroup)
1306             groupsize = *atgroup++;
1307         }
1308     }
1309   if (negative)
1310     *--p = '-';
1311
1312   return p;
1313 }
1314
1315 /* N, a byte quantity, is converted to a human-readable abberviated
1316    form a la sizes printed by `ls -lh'.  The result is written to a
1317    static buffer, a pointer to which is returned.
1318
1319    Unlike `with_thousand_seps', this approximates to the nearest unit.
1320    Quoting GNU libit: "Most people visually process strings of 3-4
1321    digits effectively, but longer strings of digits are more prone to
1322    misinterpretation.  Hence, converting to an abbreviated form
1323    usually improves readability."
1324
1325    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1326    original computer-related meaning of "powers of 1024".  Powers of
1327    1000 would be useless since Wget already displays sizes with
1328    thousand separators.  We don't use the "*bibyte" names invented in
1329    1998, and seldom used in practice.  Wikipedia's entry on kilobyte
1330    discusses this in some detail.  */
1331
1332 char *
1333 human_readable (HR_NUMTYPE n)
1334 {
1335   /* These suffixes are compatible with those of GNU `ls -lh'. */
1336   static char powers[] =
1337     {
1338       'K',                      /* kilobyte, 2^10 bytes */
1339       'M',                      /* megabyte, 2^20 bytes */
1340       'G',                      /* gigabyte, 2^30 bytes */
1341       'T',                      /* terabyte, 2^40 bytes */
1342       'P',                      /* petabyte, 2^50 bytes */
1343       'E',                      /* exabyte,  2^60 bytes */
1344     };
1345   static char buf[8];
1346   int i;
1347
1348   /* If the quantity is smaller than 1K, just print it. */
1349   if (n < 1024)
1350     {
1351       snprintf (buf, sizeof (buf), "%d", (int) n);
1352       return buf;
1353     }
1354
1355   /* Loop over powers, dividing N with 1024 in each iteration.  This
1356      works unchanged for all sizes of wgint, while still avoiding
1357      non-portable `long double' arithmetic.  */
1358   for (i = 0; i < countof (powers); i++)
1359     {
1360       /* At each iteration N is greater than the *subsequent* power.
1361          That way N/1024.0 produces a decimal number in the units of
1362          *this* power.  */
1363       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1364         {
1365           double val = n / 1024.0;
1366           /* Print values smaller than 10 with one decimal digits, and
1367              others without any decimals.  */
1368           snprintf (buf, sizeof (buf), "%.*f%c",
1369                     val < 10 ? 1 : 0, val, powers[i]);
1370           return buf;
1371         }
1372       n /= 1024;
1373     }
1374   return NULL;                  /* unreached */
1375 }
1376
1377 /* Count the digits in the provided number.  Used to allocate space
1378    when printing numbers.  */
1379
1380 int
1381 numdigit (wgint number)
1382 {
1383   int cnt = 1;
1384   if (number < 0)
1385     ++cnt;                      /* accomodate '-' */
1386   while ((number /= 10) != 0)
1387     ++cnt;
1388   return cnt;
1389 }
1390
1391 #define PR(mask) *p++ = n / (mask) + '0'
1392
1393 /* DIGITS_<D> is used to print a D-digit number and should be called
1394    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1395    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1396    Recursively this continues until DIGITS_1 is invoked.  */
1397
1398 #define DIGITS_1(mask) PR (mask)
1399 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1400 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1401 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1402 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1403 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1404 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1405 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1406 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1407 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1408
1409 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1410
1411 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1412 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1413 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1414 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1415 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1416 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1417 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1418 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1419 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1420
1421 /* SPRINTF_WGINT is used by number_to_string to handle pathological
1422    cases and to portably support strange sizes of wgint.  Ideally this
1423    would just use "%j" and intmax_t, but many systems don't support
1424    it, so it's used only if nothing else works.  */
1425 #if SIZEOF_LONG >= SIZEOF_WGINT
1426 # define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n))
1427 #elif SIZEOF_LONG_LONG >= SIZEOF_WGINT
1428 # define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n))
1429 #elif defined(WINDOWS)
1430 # define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n))
1431 #else
1432 # define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n))
1433 #endif
1434
1435 /* Shorthand for casting to wgint. */
1436 #define W wgint
1437
1438 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1439    `sprintf(buffer, "%lld", (long long) number)', only typically much
1440    faster and portable to machines without long long.
1441
1442    The speedup may make a difference in programs that frequently
1443    convert numbers to strings.  Some implementations of sprintf,
1444    particularly the one in GNU libc, have been known to be extremely
1445    slow when converting integers to strings.
1446
1447    Return the pointer to the location where the terminating zero was
1448    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1449    function is done.)
1450
1451    BUFFER should be big enough to accept as many bytes as you expect
1452    the number to take up.  On machines with 64-bit longs the maximum
1453    needed size is 24 bytes.  That includes the digits needed for the
1454    largest 64-bit number, the `-' sign in case it's negative, and the
1455    terminating '\0'.  */
1456
1457 char *
1458 number_to_string (char *buffer, wgint number)
1459 {
1460   char *p = buffer;
1461   wgint n = number;
1462
1463 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1464   /* We are running in a strange or misconfigured environment.  Let
1465      sprintf cope with it.  */
1466   SPRINTF_WGINT (buffer, n);
1467   p += strlen (buffer);
1468 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1469
1470   if (n < 0)
1471     {
1472       if (n < -WGINT_MAX)
1473         {
1474           /* -n would overflow.  Have sprintf deal with this.  */
1475           SPRINTF_WGINT (buffer, n);
1476           p += strlen (buffer);
1477           return p;
1478         }
1479
1480       *p++ = '-';
1481       n = -n;
1482     }
1483
1484   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1485      way printing any N is fully open-coded without a loop or jump.
1486      (Also see description of DIGITS_*.)  */
1487
1488   if      (n < 10)                       DIGITS_1 (1);
1489   else if (n < 100)                      DIGITS_2 (10);
1490   else if (n < 1000)                     DIGITS_3 (100);
1491   else if (n < 10000)                    DIGITS_4 (1000);
1492   else if (n < 100000)                   DIGITS_5 (10000);
1493   else if (n < 1000000)                  DIGITS_6 (100000);
1494   else if (n < 10000000)                 DIGITS_7 (1000000);
1495   else if (n < 100000000)                DIGITS_8 (10000000);
1496   else if (n < 1000000000)               DIGITS_9 (100000000);
1497 #if SIZEOF_WGINT == 4
1498   /* wgint is 32 bits wide: no number has more than 10 digits. */
1499   else                                   DIGITS_10 (1000000000);
1500 #else
1501   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1502      Constants are constructed by compile-time multiplication to avoid
1503      dealing with different notations for 64-bit constants
1504      (nL/nLL/nI64, depending on the compiler and architecture).  */
1505   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1506   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1507   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1508   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1509   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1510   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1511   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1512   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1513   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1514   else                                   DIGITS_19 (1000000000*(W)1000000000);
1515 #endif
1516
1517   *p = '\0';
1518 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1519
1520   return p;
1521 }
1522
1523 #undef PR
1524 #undef W
1525 #undef SPRINTF_WGINT
1526 #undef DIGITS_1
1527 #undef DIGITS_2
1528 #undef DIGITS_3
1529 #undef DIGITS_4
1530 #undef DIGITS_5
1531 #undef DIGITS_6
1532 #undef DIGITS_7
1533 #undef DIGITS_8
1534 #undef DIGITS_9
1535 #undef DIGITS_10
1536 #undef DIGITS_11
1537 #undef DIGITS_12
1538 #undef DIGITS_13
1539 #undef DIGITS_14
1540 #undef DIGITS_15
1541 #undef DIGITS_16
1542 #undef DIGITS_17
1543 #undef DIGITS_18
1544 #undef DIGITS_19
1545
1546 #define RING_SIZE 3
1547
1548 /* Print NUMBER to a statically allocated string and return a pointer
1549    to the printed representation.
1550
1551    This function is intended to be used in conjunction with printf.
1552    It is hard to portably print wgint values:
1553     a) you cannot use printf("%ld", number) because wgint can be long
1554        long on 32-bit machines with LFS.
1555     b) you cannot use printf("%lld", number) because NUMBER could be
1556        long on 32-bit machines without LFS, or on 64-bit machines,
1557        which do not require LFS.  Also, Windows doesn't support %lld.
1558     c) you cannot use printf("%j", (int_max_t) number) because not all
1559        versions of printf support "%j", the most notable being the one
1560        on Windows.
1561     d) you cannot #define WGINT_FMT to the appropriate format and use
1562        printf(WGINT_FMT, number) because that would break translations
1563        for user-visible messages, such as printf("Downloaded: %d
1564        bytes\n", number).
1565
1566    What you should use instead is printf("%s", number_to_static_string
1567    (number)).
1568
1569    CAVEAT: since the function returns pointers to static data, you
1570    must be careful to copy its result before calling it again.
1571    However, to make it more useful with printf, the function maintains
1572    an internal ring of static buffers to return.  That way things like
1573    printf("%s %s", number_to_static_string (num1),
1574    number_to_static_string (num2)) work as expected.  Three buffers
1575    are currently used, which means that "%s %s %s" will work, but "%s
1576    %s %s %s" won't.  If you need to print more than three wgints,
1577    bump the RING_SIZE (or rethink your message.)  */
1578
1579 char *
1580 number_to_static_string (wgint number)
1581 {
1582   static char ring[RING_SIZE][24];
1583   static int ringpos;
1584   char *buf = ring[ringpos];
1585   number_to_string (buf, number);
1586   ringpos = (ringpos + 1) % RING_SIZE;
1587   return buf;
1588 }
1589 \f
1590 /* Determine the width of the terminal we're running on.  If that's
1591    not possible, return 0.  */
1592
1593 int
1594 determine_screen_width (void)
1595 {
1596   /* If there's a way to get the terminal size using POSIX
1597      tcgetattr(), somebody please tell me.  */
1598 #ifdef TIOCGWINSZ
1599   int fd;
1600   struct winsize wsz;
1601
1602   if (opt.lfilename != NULL)
1603     return 0;
1604
1605   fd = fileno (stderr);
1606   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1607     return 0;                   /* most likely ENOTTY */
1608
1609   return wsz.ws_col;
1610 #elif defined(WINDOWS)
1611   CONSOLE_SCREEN_BUFFER_INFO csbi;
1612   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1613     return 0;
1614   return csbi.dwSize.X;
1615 #else  /* neither TIOCGWINSZ nor WINDOWS */
1616   return 0;
1617 #endif /* neither TIOCGWINSZ nor WINDOWS */
1618 }
1619 \f
1620 /* Whether the rnd system (either rand or [dl]rand48) has been
1621    seeded.  */
1622 static int rnd_seeded;
1623
1624 /* Return a random number between 0 and MAX-1, inclusive.
1625
1626    If the system does not support lrand48 and MAX is greater than the
1627    value of RAND_MAX+1 on the system, the returned value will be in
1628    the range [0, RAND_MAX].  This may be fixed in a future release.
1629    The random number generator is seeded automatically the first time
1630    it is called.
1631
1632    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1633    for cryptography.  It is only meant to be used in situations where
1634    quality of the random numbers returned doesn't really matter.  */
1635
1636 int
1637 random_number (int max)
1638 {
1639 #ifdef HAVE_DRAND48
1640   if (!rnd_seeded)
1641     {
1642       srand48 ((long) time (NULL) ^ (long) getpid ());
1643       rnd_seeded = 1;
1644     }
1645   return lrand48 () % max;
1646 #else  /* not HAVE_DRAND48 */
1647
1648   double bounded;
1649   int rnd;
1650   if (!rnd_seeded)
1651     {
1652       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1653       rnd_seeded = 1;
1654     }
1655   rnd = rand ();
1656
1657   /* Like rand() % max, but uses the high-order bits for better
1658      randomness on architectures where rand() is implemented using a
1659      simple congruential generator.  */
1660
1661   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1662   return (int) bounded;
1663
1664 #endif /* not HAVE_DRAND48 */
1665 }
1666
1667 /* Return a random uniformly distributed floating point number in the
1668    [0, 1) range.  Uses drand48 where available, and a really lame
1669    kludge elsewhere.  */
1670
1671 double
1672 random_float (void)
1673 {
1674 #ifdef HAVE_DRAND48
1675   if (!rnd_seeded)
1676     {
1677       srand48 ((long) time (NULL) ^ (long) getpid ());
1678       rnd_seeded = 1;
1679     }
1680   return drand48 ();
1681 #else  /* not HAVE_DRAND48 */
1682   return (  random_number (10000) / 10000.0
1683           + random_number (10000) / (10000.0 * 10000.0)
1684           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1685           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1686 #endif /* not HAVE_DRAND48 */
1687 }
1688 \f
1689 /* Implementation of run_with_timeout, a generic timeout-forcing
1690    routine for systems with Unix-like signal handling.  */
1691
1692 #ifdef USE_SIGNAL_TIMEOUT
1693 # ifdef HAVE_SIGSETJMP
1694 #  define SETJMP(env) sigsetjmp (env, 1)
1695
1696 static sigjmp_buf run_with_timeout_env;
1697
1698 static void
1699 abort_run_with_timeout (int sig)
1700 {
1701   assert (sig == SIGALRM);
1702   siglongjmp (run_with_timeout_env, -1);
1703 }
1704 # else /* not HAVE_SIGSETJMP */
1705 #  define SETJMP(env) setjmp (env)
1706
1707 static jmp_buf run_with_timeout_env;
1708
1709 static void
1710 abort_run_with_timeout (int sig)
1711 {
1712   assert (sig == SIGALRM);
1713   /* We don't have siglongjmp to preserve the set of blocked signals;
1714      if we longjumped out of the handler at this point, SIGALRM would
1715      remain blocked.  We must unblock it manually. */
1716   int mask = siggetmask ();
1717   mask &= ~sigmask (SIGALRM);
1718   sigsetmask (mask);
1719
1720   /* Now it's safe to longjump. */
1721   longjmp (run_with_timeout_env, -1);
1722 }
1723 # endif /* not HAVE_SIGSETJMP */
1724
1725 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1726    setitimer where available, alarm otherwise.
1727
1728    TIMEOUT should be non-zero.  If the timeout value is so small that
1729    it would be rounded to zero, it is rounded to the least legal value
1730    instead (1us for setitimer, 1s for alarm).  That ensures that
1731    SIGALRM will be delivered in all cases.  */
1732
1733 static void
1734 alarm_set (double timeout)
1735 {
1736 #ifdef ITIMER_REAL
1737   /* Use the modern itimer interface. */
1738   struct itimerval itv;
1739   xzero (itv);
1740   itv.it_value.tv_sec = (long) timeout;
1741   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1742   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1743     /* Ensure that we wait for at least the minimum interval.
1744        Specifying zero would mean "wait forever".  */
1745     itv.it_value.tv_usec = 1;
1746   setitimer (ITIMER_REAL, &itv, NULL);
1747 #else  /* not ITIMER_REAL */
1748   /* Use the old alarm() interface. */
1749   int secs = (int) timeout;
1750   if (secs == 0)
1751     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1752        because alarm(0) means "never deliver the alarm", i.e. "wait
1753        forever", which is not what someone who specifies a 0.5s
1754        timeout would expect.  */
1755     secs = 1;
1756   alarm (secs);
1757 #endif /* not ITIMER_REAL */
1758 }
1759
1760 /* Cancel the alarm set with alarm_set. */
1761
1762 static void
1763 alarm_cancel (void)
1764 {
1765 #ifdef ITIMER_REAL
1766   struct itimerval disable;
1767   xzero (disable);
1768   setitimer (ITIMER_REAL, &disable, NULL);
1769 #else  /* not ITIMER_REAL */
1770   alarm (0);
1771 #endif /* not ITIMER_REAL */
1772 }
1773
1774 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1775    seconds.  Returns true if the function was interrupted with a
1776    timeout, false otherwise.
1777
1778    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1779    using setitimer() or alarm().  The timeout is enforced by
1780    longjumping out of the SIGALRM handler.  This has several
1781    advantages compared to the traditional approach of relying on
1782    signals causing system calls to exit with EINTR:
1783
1784      * The callback function is *forcibly* interrupted after the
1785        timeout expires, (almost) regardless of what it was doing and
1786        whether it was in a syscall.  For example, a calculation that
1787        takes a long time is interrupted as reliably as an IO
1788        operation.
1789
1790      * It works with both SYSV and BSD signals because it doesn't
1791        depend on the default setting of SA_RESTART.
1792
1793      * It doesn't require special handler setup beyond a simple call
1794        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1795        optional.)
1796
1797    The only downside is that, if FUN allocates internal resources that
1798    are normally freed prior to exit from the functions, they will be
1799    lost in case of timeout.  */
1800
1801 bool
1802 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1803 {
1804   int saved_errno;
1805
1806   if (timeout == 0)
1807     {
1808       fun (arg);
1809       return false;
1810     }
1811
1812   signal (SIGALRM, abort_run_with_timeout);
1813   if (SETJMP (run_with_timeout_env) != 0)
1814     {
1815       /* Longjumped out of FUN with a timeout. */
1816       signal (SIGALRM, SIG_DFL);
1817       return true;
1818     }
1819   alarm_set (timeout);
1820   fun (arg);
1821
1822   /* Preserve errno in case alarm() or signal() modifies it. */
1823   saved_errno = errno;
1824   alarm_cancel ();
1825   signal (SIGALRM, SIG_DFL);
1826   errno = saved_errno;
1827
1828   return false;
1829 }
1830
1831 #else  /* not USE_SIGNAL_TIMEOUT */
1832
1833 #ifndef WINDOWS
1834 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
1835    define it under Windows, because Windows has its own version of
1836    run_with_timeout that uses threads.  */
1837
1838 int
1839 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1840 {
1841   fun (arg);
1842   return false;
1843 }
1844 #endif /* not WINDOWS */
1845 #endif /* not USE_SIGNAL_TIMEOUT */
1846 \f
1847 #ifndef WINDOWS
1848
1849 /* Sleep the specified amount of seconds.  On machines without
1850    nanosleep(), this may sleep shorter if interrupted by signals.  */
1851
1852 void
1853 xsleep (double seconds)
1854 {
1855 #ifdef HAVE_NANOSLEEP
1856   /* nanosleep is the preferred interface because it offers high
1857      accuracy and, more importantly, because it allows us to reliably
1858      restart receiving a signal such as SIGWINCH.  (There was an
1859      actual Debian bug report about --limit-rate malfunctioning while
1860      the terminal was being resized.)  */
1861   struct timespec sleep, remaining;
1862   sleep.tv_sec = (long) seconds;
1863   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1864   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1865     /* If nanosleep has been interrupted by a signal, adjust the
1866        sleeping period and return to sleep.  */
1867     sleep = remaining;
1868 #elif defined(HAVE_USLEEP)
1869   /* If usleep is available, use it in preference to select.  */
1870   if (seconds >= 1)
1871     {
1872       /* On some systems, usleep cannot handle values larger than
1873          1,000,000.  If the period is larger than that, use sleep
1874          first, then add usleep for subsecond accuracy.  */
1875       sleep (seconds);
1876       seconds -= (long) seconds;
1877     }
1878   usleep (seconds * 1000000);
1879 #else /* fall back select */
1880   /* Note that, although Windows supports select, it can't be used to
1881      implement sleeping because Winsock's select doesn't implement
1882      timeout when it is passed NULL pointers for all fd sets.  (But it
1883      does under Cygwin, which implements Unix-compatible select.)  */
1884   struct timeval sleep;
1885   sleep.tv_sec = (long) seconds;
1886   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1887   select (0, NULL, NULL, NULL, &sleep);
1888   /* If select returns -1 and errno is EINTR, it means we were
1889      interrupted by a signal.  But without knowing how long we've
1890      actually slept, we can't return to sleep.  Using gettimeofday to
1891      track sleeps is slow and unreliable due to clock skew.  */
1892 #endif
1893 }
1894
1895 #endif /* not WINDOWS */
1896
1897 /* Encode the string STR of length LENGTH to base64 format and place it
1898    to B64STORE.  The output will be \0-terminated, and must point to a
1899    writable buffer of at least 1+BASE64_LENGTH(length) bytes.  It
1900    returns the length of the resulting base64 data, not counting the
1901    terminating zero.
1902
1903    This implementation will not emit newlines after 76 characters of
1904    base64 data.  */
1905
1906 int
1907 base64_encode (const char *str, int length, char *b64store)
1908 {
1909   /* Conversion table.  */
1910   static char tbl[64] = {
1911     'A','B','C','D','E','F','G','H',
1912     'I','J','K','L','M','N','O','P',
1913     'Q','R','S','T','U','V','W','X',
1914     'Y','Z','a','b','c','d','e','f',
1915     'g','h','i','j','k','l','m','n',
1916     'o','p','q','r','s','t','u','v',
1917     'w','x','y','z','0','1','2','3',
1918     '4','5','6','7','8','9','+','/'
1919   };
1920   int i;
1921   const unsigned char *s = (const unsigned char *) str;
1922   char *p = b64store;
1923
1924   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
1925   for (i = 0; i < length; i += 3)
1926     {
1927       *p++ = tbl[s[0] >> 2];
1928       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1929       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1930       *p++ = tbl[s[2] & 0x3f];
1931       s += 3;
1932     }
1933
1934   /* Pad the result if necessary...  */
1935   if (i == length + 1)
1936     *(p - 1) = '=';
1937   else if (i == length + 2)
1938     *(p - 1) = *(p - 2) = '=';
1939
1940   /* ...and zero-terminate it.  */
1941   *p = '\0';
1942
1943   return p - b64store;
1944 }
1945
1946 /* Store in C the next non-whitespace character from the string, or \0
1947    when end of string is reached.  */
1948 #define NEXT_CHAR(c, p) do {                    \
1949   c = (unsigned char) *p++;                     \
1950 } while (ISSPACE (c))
1951
1952 #define IS_ASCII(c) (((c) & 0x80) == 0)
1953
1954 /* Decode data from BASE64 (pointer to \0-terminated text) into memory
1955    pointed to by TO.  TO should be large enough to accomodate the
1956    decoded data, which is guaranteed to be less than strlen(base64).
1957
1958    Since TO is assumed to contain binary data, it is not
1959    NUL-terminated.  The function returns the length of the data
1960    written to TO.  -1 is returned in case of error caused by malformed
1961    base64 input.  */
1962
1963 int
1964 base64_decode (const char *base64, char *to)
1965 {
1966   /* Table of base64 values for first 128 characters.  Note that this
1967      assumes ASCII (but so does Wget in other places).  */
1968   static signed char base64_char_to_value[128] =
1969     {
1970       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
1971       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
1972       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
1973       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
1974       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
1975       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
1976       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
1977       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
1978       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
1979       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
1980       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
1981       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
1982       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
1983     };
1984 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
1985 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
1986
1987   const char *p = base64;
1988   char *q = to;
1989
1990   while (1)
1991     {
1992       unsigned char c;
1993       unsigned long value;
1994
1995       /* Process first byte of a quadruplet.  */
1996       NEXT_CHAR (c, p);
1997       if (!c)
1998         break;
1999       if (c == '=' || !IS_BASE64 (c))
2000         return -1;              /* illegal char while decoding base64 */
2001       value = BASE64_CHAR_TO_VALUE (c) << 18;
2002
2003       /* Process second byte of a quadruplet.  */
2004       NEXT_CHAR (c, p);
2005       if (!c)
2006         return -1;              /* premature EOF while decoding base64 */
2007       if (c == '=' || !IS_BASE64 (c))
2008         return -1;              /* illegal char while decoding base64 */
2009       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2010       *q++ = value >> 16;
2011
2012       /* Process third byte of a quadruplet.  */
2013       NEXT_CHAR (c, p);
2014       if (!c)
2015         return -1;              /* premature EOF while decoding base64 */
2016       if (!IS_BASE64 (c))
2017         return -1;              /* illegal char while decoding base64 */
2018
2019       if (c == '=')
2020         {
2021           NEXT_CHAR (c, p);
2022           if (!c)
2023             return -1;          /* premature EOF while decoding base64 */
2024           if (c != '=')
2025             return -1;          /* padding `=' expected but not found */
2026           continue;
2027         }
2028
2029       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2030       *q++ = 0xff & value >> 8;
2031
2032       /* Process fourth byte of a quadruplet.  */
2033       NEXT_CHAR (c, p);
2034       if (!c)
2035         return -1;              /* premature EOF while decoding base64 */
2036       if (c == '=')
2037         continue;
2038       if (!IS_BASE64 (c))
2039         return -1;              /* illegal char while decoding base64 */
2040
2041       value |= BASE64_CHAR_TO_VALUE (c);
2042       *q++ = 0xff & value;
2043     }
2044 #undef IS_BASE64
2045 #undef BASE64_CHAR_TO_VALUE
2046
2047   return q - to;
2048 }
2049
2050 #undef IS_ASCII
2051 #undef NEXT_CHAR
2052 \f
2053 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2054    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2055
2056 static void
2057 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2058                     int (*cmpfun) (const void *, const void *))
2059 {
2060 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2061   if (from < to)
2062     {
2063       size_t i, j, k;
2064       size_t mid = (to + from) / 2;
2065       mergesort_internal (base, temp, size, from, mid, cmpfun);
2066       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2067       i = from;
2068       j = mid + 1;
2069       for (k = from; (i <= mid) && (j <= to); k++)
2070         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2071           memcpy (ELT (temp, k), ELT (base, i++), size);
2072         else
2073           memcpy (ELT (temp, k), ELT (base, j++), size);
2074       while (i <= mid)
2075         memcpy (ELT (temp, k++), ELT (base, i++), size);
2076       while (j <= to)
2077         memcpy (ELT (temp, k++), ELT (base, j++), size);
2078       for (k = from; k <= to; k++)
2079         memcpy (ELT (base, k), ELT (temp, k), size);
2080     }
2081 #undef ELT
2082 }
2083
2084 /* Stable sort with interface exactly like standard library's qsort.
2085    Uses mergesort internally, allocating temporary storage with
2086    alloca.  */
2087
2088 void
2089 stable_sort (void *base, size_t nmemb, size_t size,
2090              int (*cmpfun) (const void *, const void *))
2091 {
2092   if (size > 1)
2093     {
2094       void *temp = alloca (nmemb * size * sizeof (void *));
2095       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2096     }
2097 }
2098 \f
2099 /* Print a decimal number.  If it is equal to or larger than ten, the
2100    number is rounded.  Otherwise it is printed with one significant
2101    digit without trailing zeros and with no more than three fractional
2102    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2103    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2104
2105    This is useful for displaying durations because it provides
2106    order-of-magnitude information without unnecessary clutter --
2107    long-running downloads are shown without the fractional part, and
2108    short ones still retain one significant digit.  */
2109
2110 const char *
2111 print_decimal (double number)
2112 {
2113   static char buf[32];
2114   double n = number >= 0 ? number : -number;
2115
2116   if (n >= 9.95)
2117     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2118        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2119     snprintf (buf, sizeof buf, "%.0f", number);
2120   else if (n >= 0.95)
2121     snprintf (buf, sizeof buf, "%.1f", number);
2122   else if (n >= 0.001)
2123     snprintf (buf, sizeof buf, "%.1g", number);
2124   else if (n >= 0.0005)
2125     /* round [0.0005, 0.001) to 0.001 */
2126     snprintf (buf, sizeof buf, "%.3f", number);
2127   else
2128     /* print numbers close to 0 as 0, not 0.000 */
2129     strcpy (buf, "0");
2130
2131   return buf;
2132 }