sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #ifdef HAVE_UTIME_H
  46 # include <utime.h>
  47 #endif
  48 #include <errno.h>
  49 #include <fcntl.h>
  50 #include <assert.h>
  51 #include <stdarg.h>
  52 #include <locale.h>
  53
  54 #include <sys/time.h>
  55
  56
  57 /* For TIOCGWINSZ and friends: */
  58 #ifdef HAVE_SYS_IOCTL_H
  59 # include <sys/ioctl.h>
  60 #endif
  61 #ifdef HAVE_TERMIOS_H
  62 # include <termios.h>
  63 #endif
  64
  65 /* Needed for Unix version of run_with_timeout. */
  66 #include <signal.h>
  67 #include <setjmp.h>
  68
  69 #ifndef HAVE_SIGSETJMP
  70 /* If sigsetjmp is a macro, configure won't pick it up. */
  71 # ifdef sigsetjmp
  72 #  define HAVE_SIGSETJMP
  73 # endif
  74 #endif
  75
  76 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  77 # define USE_SIGNAL_TIMEOUT
  78 #endif
  79
  80 #include "utils.h"
  81 #include "hash.h"
  82
  83 #ifdef __VMS
  84 #include "vms.h"
  85 #endif /* def __VMS */
  86
  87 #ifdef TESTING
  88 #include "test.h"
  89 #endif
  90
  91 static void
  92 memfatal (const char *context, long attempted_size)
  93 {
  94   /* Make sure we don't try to store part of the log line, and thus
  95      call malloc.  */
  96   log_set_save_context (false);
  97
  98   /* We have different log outputs in different situations:
  99      1) output without bytes information
 100      2) output with bytes information  */
 101   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 102     {
 103       logprintf (LOG_ALWAYS,
 104                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 105                  exec_name, context);
 106     }
 107   else
 108     {
 109       logprintf (LOG_ALWAYS,
 110                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 111                  exec_name, context, attempted_size);
 112     }
 113
 114   exit (1);
 115 }
 116
 117 /* Character property table for (re-)escaping VMS ODS5 extended file
 118    names.  Note that this table ignores Unicode.
 119
 120    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 121
 122    ODS5 Invalid characters:
 123       C0 control codes (0x00 to 0x1F inclusive)
 124       Asterisk (*)
 125       Question mark (?)
 126
 127    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 128       Double quotation marks (")
 129       Backslash (\)
 130       Colon (:)
 131       Left angle bracket (<)
 132       Right angle bracket (>)
 133       Slash (/)
 134       Vertical bar (|)
 135
 136    Characters escaped by "^":
 137       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 138        @  [  \  ]  ^  `  {  |  }  ~
 139
 140    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 141    case.  Note that un-escaped < and > can also confuse a directory
 142    spec.
 143
 144    Characters put out as ^xx:
 145       7F (DEL)
 146       80-9F (C1 control characters)
 147       A0 (nonbreaking space)
 148       FF (Latin small letter y diaeresis)
 149
 150    Other cases:
 151       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 152
 153     Property table values:
 154       Normal escape:    1
 155       Space:            2
 156       Dot:              4
 157       Hex-hex escape:   8
 158       ODS2 normal:     16
 159       ODS2 lower case: 32
 160       Hex digit:       64
 161 */
 162
 163 unsigned char char_prop[ 256] = {
 164
 165 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 166     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 167
 168 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 169     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 170
 171 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 172     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 173
 174 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 175    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 176
 177 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 178     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 179
 180 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 181    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 182
 183 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 184     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 185
 186 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 187    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 188
 189     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 190     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 191     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 192     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 193     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 194     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 195     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 196     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 197 };
 198
 199 /* Utility function: like xstrdup(), but also lowercases S.  */
 200
 201 char *
 202 xstrdup_lower (const char *s)
 203 {
 204   char *copy = xstrdup (s);
 205   char *p = copy;
 206   for (; *p; p++)
 207     *p = c_tolower (*p);
 208   return copy;
 209 }
 210
 211 /* Copy the string formed by two pointers (one on the beginning, other
 212    on the char after the last char) to a new, malloc-ed location.
 213    0-terminate it.  */
 214 char *
 215 strdupdelim (const char *beg, const char *end)
 216 {
 217   char *res = xmalloc (end - beg + 1);
 218   memcpy (res, beg, end - beg);
 219   res[end - beg] = '\0';
 220   return res;
 221 }
 222
 223 /* Parse a string containing comma-separated elements, and return a
 224    vector of char pointers with the elements.  Spaces following the
 225    commas are ignored.  */
 226 char **
 227 sepstring (const char *s)
 228 {
 229   char **res;
 230   const char *p;
 231   int i = 0;
 232
 233   if (!s || !*s)
 234     return NULL;
 235   res = NULL;
 236   p = s;
 237   while (*s)
 238     {
 239       if (*s == ',')
 240         {
 241           res = xrealloc (res, (i + 2) * sizeof (char *));
 242           res[i] = strdupdelim (p, s);
 243           res[++i] = NULL;
 244           ++s;
 245           /* Skip the blanks following the ','.  */
 246           while (c_isspace (*s))
 247             ++s;
 248           p = s;
 249         }
 250       else
 251         ++s;
 252     }
 253   res = xrealloc (res, (i + 2) * sizeof (char *));
 254   res[i] = strdupdelim (p, s);
 255   res[i + 1] = NULL;
 256   return res;
 257 }
 258 \f
 259 /* Like sprintf, but prints into a string of sufficient size freshly
 260    allocated with malloc, which is returned.  If unable to print due
 261    to invalid format, returns NULL.  Inability to allocate needed
 262    memory results in abort, as with xmalloc.  This is in spirit
 263    similar to the GNU/BSD extension asprintf, but somewhat easier to
 264    use.
 265
 266    Internally the function either calls vasprintf or loops around
 267    vsnprintf until the correct size is found.  Since Wget also ships a
 268    fallback implementation of vsnprintf, this should be portable.  */
 269
 270 /* Constant is using for limits memory allocation for text buffer.
 271    Applicable in situation when: vasprintf is not available in the system
 272    and vsnprintf return -1 when long line is truncated (in old versions of
 273    glibc and in other system where C99 doesn`t support) */
 274
 275 #define FMT_MAX_LENGTH 1048576
 276
 277 char *
 278 aprintf (const char *fmt, ...)
 279 {
 280 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 281   /* Use vasprintf. */
 282   int ret;
 283   va_list args;
 284   char *str;
 285   va_start (args, fmt);
 286   ret = vasprintf (&str, fmt, args);
 287   va_end (args);
 288   if (ret < 0 && errno == ENOMEM)
 289     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 290                                                       with xmalloc/xrealloc */
 291   else if (ret < 0)
 292     return NULL;
 293   return str;
 294 #else  /* not HAVE_VASPRINTF */
 295
 296   /* vasprintf is unavailable.  snprintf into a small buffer and
 297      resize it as necessary. */
 298   int size = 32;
 299   char *str = xmalloc (size);
 300
 301   /* #### This code will infloop and eventually abort in xrealloc if
 302      passed a FMT that causes snprintf to consistently return -1.  */
 303
 304   while (1)
 305     {
 306       int n;
 307       va_list args;
 308
 309       va_start (args, fmt);
 310       n = vsnprintf (str, size, fmt, args);
 311       va_end (args);
 312
 313       /* If the printing worked, return the string. */
 314       if (n > -1 && n < size)
 315         return str;
 316
 317       /* Else try again with a larger buffer. */
 318       if (n > -1)               /* C99 */
 319         size = n + 1;           /* precisely what is needed */
 320       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 321         {                               /* maybe we have some wrong
 322                                            format string? */
 323           logprintf (LOG_ALWAYS,
 324                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 325                        "aborting.\n"),
 326                      exec_name, size);  /* printout a log message */
 327           abort ();                     /* and abort... */
 328         }
 329       else
 330         {
 331           /* else, we continue to grow our
 332            * buffer: Twice the old size. */
 333           size <<= 1;
 334         }
 335       str = xrealloc (str, size);
 336     }
 337 #endif /* not HAVE_VASPRINTF */
 338 }
 339
 340 /* Concatenate the NULL-terminated list of string arguments into
 341    freshly allocated space.  */
 342
 343 char *
 344 concat_strings (const char *str0, ...)
 345 {
 346   va_list args;
 347   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 348   char *ret, *p;
 349
 350   const char *next_str;
 351   int total_length = 0;
 352   size_t argcount;
 353
 354   /* Calculate the length of and allocate the resulting string. */
 355
 356   argcount = 0;
 357   va_start (args, str0);
 358   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 359     {
 360       int len = strlen (next_str);
 361       if (argcount < countof (saved_lengths))
 362         saved_lengths[argcount++] = len;
 363       total_length += len;
 364     }
 365   va_end (args);
 366   p = ret = xmalloc (total_length + 1);
 367
 368   /* Copy the strings into the allocated space. */
 369
 370   argcount = 0;
 371   va_start (args, str0);
 372   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 373     {
 374       int len;
 375       if (argcount < countof (saved_lengths))
 376         len = saved_lengths[argcount++];
 377       else
 378         len = strlen (next_str);
 379       memcpy (p, next_str, len);
 380       p += len;
 381     }
 382   va_end (args);
 383   *p = '\0';
 384
 385   return ret;
 386 }
 387 \f
 388 /* Format the provided time according to the specified format.  The
 389    format is a string with format elements supported by strftime.  */
 390
 391 static char *
 392 fmttime (time_t t, const char *fmt)
 393 {
 394   static char output[32];
 395   struct tm *tm = localtime(&t);
 396   if (!tm)
 397     abort ();
 398   if (!strftime(output, sizeof(output), fmt, tm))
 399     abort ();
 400   return output;
 401 }
 402
 403 /* Return pointer to a static char[] buffer in which zero-terminated
 404    string-representation of TM (in form hh:mm:ss) is printed.
 405
 406    If TM is NULL, the current time will be used.  */
 407
 408 char *
 409 time_str (time_t t)
 410 {
 411   return fmttime(t, "%H:%M:%S");
 412 }
 413
 414 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 415
 416 char *
 417 datetime_str (time_t t)
 418 {
 419   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 420 }
 421 \f
 422 /* The Windows versions of the following two functions are defined in
 423    mswindows.c. On MSDOS this function should never be called. */
 424
 425 #ifdef __VMS
 426
 427 void
 428 fork_to_background (void)
 429 {
 430   return;
 431 }
 432
 433 #else /* def __VMS */
 434
 435 #if !defined(WINDOWS) && !defined(MSDOS)
 436 void
 437 fork_to_background (void)
 438 {
 439   pid_t pid;
 440   /* Whether we arrange our own version of opt.lfilename here.  */
 441   bool logfile_changed = false;
 442
 443   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 444     {
 445       /* We must create the file immediately to avoid either a race
 446          condition (which arises from using unique_name and failing to
 447          use fopen_excl) or lying to the user about the log file name
 448          (which arises from using unique_name, printing the name, and
 449          using fopen_excl later on.)  */
 450       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 451       if (new_log_fp)
 452         {
 453           logfile_changed = true;
 454           fclose (new_log_fp);
 455         }
 456     }
 457   pid = fork ();
 458   if (pid < 0)
 459     {
 460       /* parent, error */
 461       perror ("fork");
 462       exit (1);
 463     }
 464   else if (pid != 0)
 465     {
 466       /* parent, no error */
 467       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 468       if (logfile_changed)
 469         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 470       exit (0);                 /* #### should we use _exit()? */
 471     }
 472
 473   /* child: give up the privileges and keep running. */
 474   setsid ();
 475   freopen ("/dev/null", "r", stdin);
 476   freopen ("/dev/null", "w", stdout);
 477   freopen ("/dev/null", "w", stderr);
 478 }
 479 #endif /* !WINDOWS && !MSDOS */
 480
 481 #endif /* def __VMS [else] */
 482
 483 \f
 484 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 485    specified with TM.  The atime ("access time") is set to the current
 486    time.  */
 487
 488 void
 489 touch (const char *file, time_t tm)
 490 {
 491   struct timeval timevals[2];
 492
 493   timevals[0].tv_sec = time (NULL);
 494   timevals[0].tv_usec = 0L;
 495   timevals[1].tv_sec = tm;
 496   timevals[1].tv_usec = 0L;
 497
 498   if (utimes (file, timevals) == -1)
 499     logprintf (LOG_NOTQUIET, "utimes(%s): %s\n", file, strerror (errno));
 500 }
 501
 502 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 503    nothing under MS-Windows.  */
 504 int
 505 remove_link (const char *file)
 506 {
 507   int err = 0;
 508   struct_stat st;
 509
 510   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 511     {
 512       DEBUGP (("Unlinking %s (symlink).\n", file));
 513       err = unlink (file);
 514       if (err != 0)
 515         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 516                    quote (file), strerror (errno));
 517     }
 518   return err;
 519 }
 520
 521 /* Does FILENAME exist?  This is quite a lousy implementation, since
 522    it supplies no error codes -- only a yes-or-no answer.  Thus it
 523    will return that a file does not exist if, e.g., the directory is
 524    unreadable.  I don't mind it too much currently, though.  The
 525    proper way should, of course, be to have a third, error state,
 526    other than true/false, but that would introduce uncalled-for
 527    additional complexity to the callers.  */
 528 bool
 529 file_exists_p (const char *filename)
 530 {
 531 #ifdef HAVE_ACCESS
 532   return access (filename, F_OK) >= 0;
 533 #else
 534   struct_stat buf;
 535   return stat (filename, &buf) >= 0;
 536 #endif
 537 }
 538
 539 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 540    Returns 0 on error.  */
 541 bool
 542 file_non_directory_p (const char *path)
 543 {
 544   struct_stat buf;
 545   /* Use lstat() rather than stat() so that symbolic links pointing to
 546      directories can be identified correctly.  */
 547   if (lstat (path, &buf) != 0)
 548     return false;
 549   return S_ISDIR (buf.st_mode) ? false : true;
 550 }
 551
 552 /* Return the size of file named by FILENAME, or -1 if it cannot be
 553    opened or seeked into. */
 554 wgint
 555 file_size (const char *filename)
 556 {
 557 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 558   wgint size;
 559   /* We use fseek rather than stat to determine the file size because
 560      that way we can also verify that the file is readable without
 561      explicitly checking for permissions.  Inspired by the POST patch
 562      by Arnaud Wylie.  */
 563   FILE *fp = fopen (filename, "rb");
 564   if (!fp)
 565     return -1;
 566   fseeko (fp, 0, SEEK_END);
 567   size = ftello (fp);
 568   fclose (fp);
 569   return size;
 570 #else
 571   struct_stat st;
 572   if (stat (filename, &st) < 0)
 573     return -1;
 574   return st.st_size;
 575 #endif
 576 }
 577
 578 /* 2005-02-19 SMS.
 579    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 580    original name.  With the VMS file systems' versioning, everything
 581    should be fine, and appending ".NN" just causes trouble.
 582 */
 583
 584 #ifdef UNIQ_SEP
 585
 586 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 587    doesn't exist is found.  Return a freshly allocated copy of the
 588    unused file name.  */
 589
 590 static char *
 591 unique_name_1 (const char *prefix)
 592 {
 593   int count = 1;
 594   int plen = strlen (prefix);
 595   char *template = (char *)alloca (plen + 1 + 24);
 596   char *template_tail = template + plen;
 597
 598   memcpy (template, prefix, plen);
 599   *template_tail++ = UNIQ_SEP;
 600
 601   do
 602     number_to_string (template_tail, count++);
 603   while (file_exists_p (template));
 604
 605   return xstrdup (template);
 606 }
 607
 608 /* Return a unique file name, based on FILE.
 609
 610    More precisely, if FILE doesn't exist, it is returned unmodified.
 611    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 612    file name that doesn't exist is returned.
 613
 614    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 615
 616    The resulting file is not created, only verified that it didn't
 617    exist at the point in time when the function was called.
 618    Therefore, where security matters, don't rely that the file created
 619    by this function exists until you open it with O_EXCL or
 620    equivalent.
 621
 622    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 623    string.  Otherwise, it may return FILE if the file doesn't exist
 624    (and therefore doesn't need changing).  */
 625
 626 char *
 627 unique_name (const char *file, bool allow_passthrough)
 628 {
 629   /* If the FILE itself doesn't exist, return it without
 630      modification. */
 631   if (!file_exists_p (file))
 632     return allow_passthrough ? (char *)file : xstrdup (file);
 633
 634   /* Otherwise, find a numeric suffix that results in unused file name
 635      and return it.  */
 636   return unique_name_1 (file);
 637 }
 638
 639 #else /* def UNIQ_SEP */
 640
 641 /* Dummy unique_name() for VMS.  Return the original name as easily as
 642    possible.
 643 */
 644 char *
 645 unique_name (const char *file, bool allow_passthrough)
 646 {
 647   /* Return the FILE itself, without modification, irregardful. */
 648   return allow_passthrough ? (char *)file : xstrdup (file);
 649 }
 650
 651 #endif /* def UNIQ_SEP [else] */
 652
 653 /* Create a file based on NAME, except without overwriting an existing
 654    file with that name.  Providing O_EXCL is correctly implemented,
 655    this function does not have the race condition associated with
 656    opening the file returned by unique_name.  */
 657
 658 FILE *
 659 unique_create (const char *name, bool binary, char **opened_name)
 660 {
 661   /* unique file name, based on NAME */
 662   char *uname = unique_name (name, false);
 663   FILE *fp;
 664   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 665     {
 666       xfree (uname);
 667       uname = unique_name (name, false);
 668     }
 669   if (opened_name && fp != NULL)
 670     {
 671       if (fp)
 672         *opened_name = uname;
 673       else
 674         {
 675           *opened_name = NULL;
 676           xfree (uname);
 677         }
 678     }
 679   else
 680     xfree (uname);
 681   return fp;
 682 }
 683
 684 /* Open the file for writing, with the addition that the file is
 685    opened "exclusively".  This means that, if the file already exists,
 686    this function will *fail* and errno will be set to EEXIST.  If
 687    BINARY is set, the file will be opened in binary mode, equivalent
 688    to fopen's "wb".
 689
 690    If opening the file fails for any reason, including the file having
 691    previously existed, this function returns NULL and sets errno
 692    appropriately.  */
 693
 694 FILE *
 695 fopen_excl (const char *fname, int binary)
 696 {
 697   int fd;
 698 #ifdef O_EXCL
 699
 700 /* 2005-04-14 SMS.
 701    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 702    It also has file versions which obviate all the O_EXCL effort.
 703    O_TRUNC (something of a misnomer) requests a new version.
 704 */
 705 # ifdef __VMS
 706 /* Common open() optional arguments:
 707    sequential access only, access callback function.
 708 */
 709 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 710
 711   int open_id;
 712   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 713
 714   if (binary > 1)
 715     {
 716       open_id = 11;
 717       fd = open( fname,                 /* File name. */
 718        flags,                           /* Flags. */
 719        0777,                            /* Mode for default protection. */
 720        "ctx=bin,stm",                   /* Binary, stream access. */
 721        "rfm=stmlf",                     /* Stream_LF. */
 722        OPEN_OPT_ARGS);                  /* Access callback. */
 723     }
 724   else if (binary)
 725     {
 726       open_id = 12;
 727       fd = open( fname,                 /* File name. */
 728        flags,                           /* Flags. */
 729        0777,                            /* Mode for default protection. */
 730        "ctx=bin,stm",                   /* Binary, stream access. */
 731        "rfm=fix",                       /* Fixed-length, */
 732        "mrs=512",                       /* 512-byte records. */
 733        OPEN_OPT_ARGS);                  /* Access callback. */
 734     }
 735   else
 736     {
 737       open_id = 13;
 738       fd = open( fname,                 /* File name. */
 739        flags,                           /* Flags. */
 740        0777,                            /* Mode for default protection.
 741 */
 742        "rfm=stmlf",                     /* Stream_LF. */
 743        OPEN_OPT_ARGS);                  /* Access callback. */
 744     }
 745 # else /* def __VMS */
 746   int flags = O_WRONLY | O_CREAT | O_EXCL;
 747 # ifdef O_BINARY
 748   if (binary)
 749     flags |= O_BINARY;
 750 # endif
 751   fd = open (fname, flags, 0666);
 752 # endif /* def __VMS [else] */
 753
 754   if (fd < 0)
 755     return NULL;
 756   return fdopen (fd, binary ? "wb" : "w");
 757 #else  /* not O_EXCL */
 758   /* Manually check whether the file exists.  This is prone to race
 759      conditions, but systems without O_EXCL haven't deserved
 760      better.  */
 761   if (file_exists_p (fname))
 762     {
 763       errno = EEXIST;
 764       return NULL;
 765     }
 766   return fopen (fname, binary ? "wb" : "w");
 767 #endif /* not O_EXCL */
 768 }
 769 \f
 770 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 771    are missing, create them first.  In case any mkdir() call fails,
 772    return its error status.  Returns 0 on successful completion.
 773
 774    The behaviour of this function should be identical to the behaviour
 775    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 776 int
 777 make_directory (const char *directory)
 778 {
 779   int i, ret, quit = 0;
 780   char *dir;
 781
 782   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 783      function is unsafe if called with a read-only char *argument.  */
 784   STRDUP_ALLOCA (dir, directory);
 785
 786   /* If the first character of dir is '/', skip it (and thus enable
 787      creation of absolute-pathname directories.  */
 788   for (i = (*dir == '/'); 1; ++i)
 789     {
 790       for (; dir[i] && dir[i] != '/'; i++)
 791         ;
 792       if (!dir[i])
 793         quit = 1;
 794       dir[i] = '\0';
 795       /* Check whether the directory already exists.  Allow creation of
 796          of intermediate directories to fail, as the initial path components
 797          are not necessarily directories!  */
 798       if (!file_exists_p (dir))
 799         ret = mkdir (dir, 0777);
 800       else
 801         ret = 0;
 802       if (quit)
 803         break;
 804       else
 805         dir[i] = '/';
 806     }
 807   return ret;
 808 }
 809
 810 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 811    should be a file name.
 812
 813    file_merge("/foo/bar", "baz")  => "/foo/baz"
 814    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 815    file_merge("foo", "bar")       => "bar"
 816
 817    In other words, it's a simpler and gentler version of uri_merge.  */
 818
 819 char *
 820 file_merge (const char *base, const char *file)
 821 {
 822   char *result;
 823   const char *cut = (const char *)strrchr (base, '/');
 824
 825   if (!cut)
 826     return xstrdup (file);
 827
 828   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 829   memcpy (result, base, cut - base);
 830   result[cut - base] = '/';
 831   strcpy (result + (cut - base) + 1, file);
 832
 833   return result;
 834 }
 835 \f
 836 /* Like fnmatch, but performs a case-insensitive match.  */
 837
 838 int
 839 fnmatch_nocase (const char *pattern, const char *string, int flags)
 840 {
 841 #ifdef FNM_CASEFOLD
 842   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 843      also present on *BSD platforms, and possibly elsewhere.  */
 844   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 845 #else
 846   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 847   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 848   char *strcopy = (char *) alloca (strlen (string) + 1);
 849   char *p;
 850   for (p = patcopy; *pattern; pattern++, p++)
 851     *p = c_tolower (*pattern);
 852   *p = '\0';
 853   for (p = strcopy; *string; string++, p++)
 854     *p = c_tolower (*string);
 855   *p = '\0';
 856   return fnmatch (patcopy, strcopy, flags);
 857 #endif
 858 }
 859
 860 static bool in_acclist (const char *const *, const char *, bool);
 861
 862 /* Determine whether a file is acceptable to be followed, according to
 863    lists of patterns to accept/reject.  */
 864 bool
 865 acceptable (const char *s)
 866 {
 867   int l = strlen (s);
 868
 869   while (l && s[l] != '/')
 870     --l;
 871   if (s[l] == '/')
 872     s += (l + 1);
 873   if (opt.accepts)
 874     {
 875       if (opt.rejects)
 876         return (in_acclist ((const char *const *)opt.accepts, s, true)
 877                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 878       else
 879         return in_acclist ((const char *const *)opt.accepts, s, true);
 880     }
 881   else if (opt.rejects)
 882     return !in_acclist ((const char *const *)opt.rejects, s, true);
 883   return true;
 884 }
 885
 886 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 887    will return true if and only if D2 begins with `/something/' or is exactly
 888    '/something'.  */
 889 bool
 890 subdir_p (const char *d1, const char *d2)
 891 {
 892   if (*d1 == '\0')
 893     return true;
 894   if (!opt.ignore_case)
 895     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 896       ;
 897   else
 898     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 899       ;
 900
 901   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 902 }
 903
 904 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 905    first element that matches DIR, through wildcards or front comparison (as
 906    appropriate).  */
 907 static bool
 908 dir_matches_p (char **dirlist, const char *dir)
 909 {
 910   char **x;
 911   int (*matcher) (const char *, const char *, int)
 912     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 913
 914   for (x = dirlist; *x; x++)
 915     {
 916       /* Remove leading '/' */
 917       char *p = *x + (**x == '/');
 918       if (has_wildcards_p (p))
 919         {
 920           if (matcher (p, dir, FNM_PATHNAME) == 0)
 921             break;
 922         }
 923       else
 924         {
 925           if (subdir_p (p, dir))
 926             break;
 927         }
 928     }
 929
 930   return *x ? true : false;
 931 }
 932
 933 /* Returns whether DIRECTORY is acceptable for download, wrt the
 934    include/exclude lists.
 935
 936    The leading `/' is ignored in paths; relative and absolute paths
 937    may be freely intermixed.  */
 938
 939 bool
 940 accdir (const char *directory)
 941 {
 942   /* Remove starting '/'.  */
 943   if (*directory == '/')
 944     ++directory;
 945   if (opt.includes)
 946     {
 947       if (!dir_matches_p (opt.includes, directory))
 948         return false;
 949     }
 950   if (opt.excludes)
 951     {
 952       if (dir_matches_p (opt.excludes, directory))
 953         return false;
 954     }
 955   return true;
 956 }
 957
 958 /* Return true if STRING ends with TAIL.  For instance:
 959
 960    match_tail ("abc", "bc", false)  -> 1
 961    match_tail ("abc", "ab", false)  -> 0
 962    match_tail ("abc", "abc", false) -> 1
 963
 964    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 965
 966 bool
 967 match_tail (const char *string, const char *tail, bool fold_case)
 968 {
 969   int i, j;
 970
 971   /* We want this to be fast, so we code two loops, one with
 972      case-folding, one without. */
 973
 974   if (!fold_case)
 975     {
 976       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 977         if (string[i] != tail[j])
 978           break;
 979     }
 980   else
 981     {
 982       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
 983         if (c_tolower (string[i]) != c_tolower (tail[j]))
 984           break;
 985     }
 986
 987   /* If the tail was exhausted, the match was succesful.  */
 988   if (j == -1)
 989     return true;
 990   else
 991     return false;
 992 }
 993
 994 /* Checks whether string S matches each element of ACCEPTS.  A list
 995    element are matched either with fnmatch() or match_tail(),
 996    according to whether the element contains wildcards or not.
 997
 998    If the BACKWARD is false, don't do backward comparison -- just compare
 999    them normally.  */
1000 static bool
1001 in_acclist (const char *const *accepts, const char *s, bool backward)
1002 {
1003   for (; *accepts; accepts++)
1004     {
1005       if (has_wildcards_p (*accepts))
1006         {
1007           int res = opt.ignore_case
1008             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1009           /* fnmatch returns 0 if the pattern *does* match the string.  */
1010           if (res == 0)
1011             return true;
1012         }
1013       else
1014         {
1015           if (backward)
1016             {
1017               if (match_tail (s, *accepts, opt.ignore_case))
1018                 return true;
1019             }
1020           else
1021             {
1022               int cmp = opt.ignore_case
1023                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1024               if (cmp == 0)
1025                 return true;
1026             }
1027         }
1028     }
1029   return false;
1030 }
1031
1032 /* Return the location of STR's suffix (file extension).  Examples:
1033    suffix ("foo.bar")       -> "bar"
1034    suffix ("foo.bar.baz")   -> "baz"
1035    suffix ("/foo/bar")      -> NULL
1036    suffix ("/foo.bar/baz")  -> NULL  */
1037 char *
1038 suffix (const char *str)
1039 {
1040   int i;
1041
1042   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
1043     ;
1044
1045   if (str[i++] == '.')
1046     return (char *)str + i;
1047   else
1048     return NULL;
1049 }
1050
1051 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1052    `]').  */
1053
1054 bool
1055 has_wildcards_p (const char *s)
1056 {
1057   for (; *s; s++)
1058     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
1059       return true;
1060   return false;
1061 }
1062
1063 /* Return true if FNAME ends with a typical HTML suffix.  The
1064    following (case-insensitive) suffixes are presumed to be HTML
1065    files:
1066
1067      html
1068      htm
1069      ?html (`?' matches one character)
1070
1071    #### CAVEAT.  This is not necessarily a good indication that FNAME
1072    refers to a file that contains HTML!  */
1073 bool
1074 has_html_suffix_p (const char *fname)
1075 {
1076   char *suf;
1077
1078   if ((suf = suffix (fname)) == NULL)
1079     return false;
1080   if (!strcasecmp (suf, "html"))
1081     return true;
1082   if (!strcasecmp (suf, "htm"))
1083     return true;
1084   if (suf[0] && !strcasecmp (suf + 1, "html"))
1085     return true;
1086   return false;
1087 }
1088
1089 /* Read a line from FP and return the pointer to freshly allocated
1090    storage.  The storage space is obtained through malloc() and should
1091    be freed with free() when it is no longer needed.
1092
1093    The length of the line is not limited, except by available memory.
1094    The newline character at the end of line is retained.  The line is
1095    terminated with a zero character.
1096
1097    After end-of-file is encountered without anything being read, NULL
1098    is returned.  NULL is also returned on error.  To distinguish
1099    between these two cases, use the stdio function ferror().  */
1100
1101 char *
1102 read_whole_line (FILE *fp)
1103 {
1104   int length = 0;
1105   int bufsize = 82;
1106   char *line = xmalloc (bufsize);
1107
1108   while (fgets (line + length, bufsize - length, fp))
1109     {
1110       length += strlen (line + length);
1111       if (length == 0)
1112         /* Possible for example when reading from a binary file where
1113            a line begins with \0.  */
1114         continue;
1115
1116       if (line[length - 1] == '\n')
1117         break;
1118
1119       /* fgets() guarantees to read the whole line, or to use up the
1120          space we've given it.  We can double the buffer
1121          unconditionally.  */
1122       bufsize <<= 1;
1123       line = xrealloc (line, bufsize);
1124     }
1125   if (length == 0 || ferror (fp))
1126     {
1127       xfree (line);
1128       return NULL;
1129     }
1130   if (length + 1 < bufsize)
1131     /* Relieve the memory from our exponential greediness.  We say
1132        `length + 1' because the terminating \0 is not included in
1133        LENGTH.  We don't need to zero-terminate the string ourselves,
1134        though, because fgets() does that.  */
1135     line = xrealloc (line, length + 1);
1136   return line;
1137 }
1138 \f
1139 /* Read FILE into memory.  A pointer to `struct file_memory' are
1140    returned; use struct element `content' to access file contents, and
1141    the element `length' to know the file length.  `content' is *not*
1142    zero-terminated, and you should *not* read or write beyond the [0,
1143    length) range of characters.
1144
1145    After you are done with the file contents, call wget_read_file_free to
1146    release the memory.
1147
1148    Depending on the operating system and the type of file that is
1149    being read, wget_read_file() either mmap's the file into memory, or
1150    reads the file into the core using read().
1151
1152    If file is named "-", fileno(stdin) is used for reading instead.
1153    If you want to read from a real file named "-", use "./-" instead.  */
1154
1155 struct file_memory *
1156 wget_read_file (const char *file)
1157 {
1158   int fd;
1159   struct file_memory *fm;
1160   long size;
1161   bool inhibit_close = false;
1162
1163   /* Some magic in the finest tradition of Perl and its kin: if FILE
1164      is "-", just use stdin.  */
1165   if (HYPHENP (file))
1166     {
1167       fd = fileno (stdin);
1168       inhibit_close = true;
1169       /* Note that we don't inhibit mmap() in this case.  If stdin is
1170          redirected from a regular file, mmap() will still work.  */
1171     }
1172   else
1173     fd = open (file, O_RDONLY);
1174   if (fd < 0)
1175     return NULL;
1176   fm = xnew (struct file_memory);
1177
1178 #ifdef HAVE_MMAP
1179   {
1180     struct_fstat buf;
1181     if (fstat (fd, &buf) < 0)
1182       goto mmap_lose;
1183     fm->length = buf.st_size;
1184     /* NOTE: As far as I know, the callers of this function never
1185        modify the file text.  Relying on this would enable us to
1186        specify PROT_READ and MAP_SHARED for a marginal gain in
1187        efficiency, but at some cost to generality.  */
1188     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1189                         MAP_PRIVATE, fd, 0);
1190     if (fm->content == (char *)MAP_FAILED)
1191       goto mmap_lose;
1192     if (!inhibit_close)
1193       close (fd);
1194
1195     fm->mmap_p = 1;
1196     return fm;
1197   }
1198
1199  mmap_lose:
1200   /* The most common reason why mmap() fails is that FD does not point
1201      to a plain file.  However, it's also possible that mmap() doesn't
1202      work for a particular type of file.  Therefore, whenever mmap()
1203      fails, we just fall back to the regular method.  */
1204 #endif /* HAVE_MMAP */
1205
1206   fm->length = 0;
1207   size = 512;                   /* number of bytes fm->contents can
1208                                    hold at any given time. */
1209   fm->content = xmalloc (size);
1210   while (1)
1211     {
1212       wgint nread;
1213       if (fm->length > size / 2)
1214         {
1215           /* #### I'm not sure whether the whole exponential-growth
1216              thing makes sense with kernel read.  On Linux at least,
1217              read() refuses to read more than 4K from a file at a
1218              single chunk anyway.  But other Unixes might optimize it
1219              better, and it doesn't *hurt* anything, so I'm leaving
1220              it.  */
1221
1222           /* Normally, we grow SIZE exponentially to make the number
1223              of calls to read() and realloc() logarithmic in relation
1224              to file size.  However, read() can read an amount of data
1225              smaller than requested, and it would be unreasonable to
1226              double SIZE every time *something* was read.  Therefore,
1227              we double SIZE only when the length exceeds half of the
1228              entire allocated size.  */
1229           size <<= 1;
1230           fm->content = xrealloc (fm->content, size);
1231         }
1232       nread = read (fd, fm->content + fm->length, size - fm->length);
1233       if (nread > 0)
1234         /* Successful read. */
1235         fm->length += nread;
1236       else if (nread < 0)
1237         /* Error. */
1238         goto lose;
1239       else
1240         /* EOF */
1241         break;
1242     }
1243   if (!inhibit_close)
1244     close (fd);
1245   if (size > fm->length && fm->length != 0)
1246     /* Due to exponential growth of fm->content, the allocated region
1247        might be much larger than what is actually needed.  */
1248     fm->content = xrealloc (fm->content, fm->length);
1249   fm->mmap_p = 0;
1250   return fm;
1251
1252  lose:
1253   if (!inhibit_close)
1254     close (fd);
1255   xfree (fm->content);
1256   xfree (fm);
1257   return NULL;
1258 }
1259
1260 /* Release the resources held by FM.  Specifically, this calls
1261    munmap() or xfree() on fm->content, depending whether mmap or
1262    malloc/read were used to read in the file.  It also frees the
1263    memory needed to hold the FM structure itself.  */
1264
1265 void
1266 wget_read_file_free (struct file_memory *fm)
1267 {
1268 #ifdef HAVE_MMAP
1269   if (fm->mmap_p)
1270     {
1271       munmap (fm->content, fm->length);
1272     }
1273   else
1274 #endif
1275     {
1276       xfree (fm->content);
1277     }
1278   xfree (fm);
1279 }
1280 \f
1281 /* Free the pointers in a NULL-terminated vector of pointers, then
1282    free the pointer itself.  */
1283 void
1284 free_vec (char **vec)
1285 {
1286   if (vec)
1287     {
1288       char **p = vec;
1289       while (*p)
1290         xfree (*p++);
1291       xfree (vec);
1292     }
1293 }
1294
1295 /* Append vector V2 to vector V1.  The function frees V2 and
1296    reallocates V1 (thus you may not use the contents of neither
1297    pointer after the call).  If V1 is NULL, V2 is returned.  */
1298 char **
1299 merge_vecs (char **v1, char **v2)
1300 {
1301   int i, j;
1302
1303   if (!v1)
1304     return v2;
1305   if (!v2)
1306     return v1;
1307   if (!*v2)
1308     {
1309       /* To avoid j == 0 */
1310       xfree (v2);
1311       return v1;
1312     }
1313   /* Count v1.  */
1314   for (i = 0; v1[i]; i++)
1315     ;
1316   /* Count v2.  */
1317   for (j = 0; v2[j]; j++)
1318     ;
1319   /* Reallocate v1.  */
1320   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1321   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1322   xfree (v2);
1323   return v1;
1324 }
1325
1326 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1327    is allocated as needed.  Return the new value of the vector. */
1328
1329 char **
1330 vec_append (char **vec, const char *str)
1331 {
1332   int cnt;                      /* count of vector elements, including
1333                                    the one we're about to append */
1334   if (vec != NULL)
1335     {
1336       for (cnt = 0; vec[cnt]; cnt++)
1337         ;
1338       ++cnt;
1339     }
1340   else
1341     cnt = 1;
1342   /* Reallocate the array to fit the new element and the NULL. */
1343   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1344   /* Append a copy of STR to the vector. */
1345   vec[cnt - 1] = xstrdup (str);
1346   vec[cnt] = NULL;
1347   return vec;
1348 }
1349 \f
1350 /* Sometimes it's useful to create "sets" of strings, i.e. special
1351    hash tables where you want to store strings as keys and merely
1352    query for their existence.  Here is a set of utility routines that
1353    makes that transparent.  */
1354
1355 void
1356 string_set_add (struct hash_table *ht, const char *s)
1357 {
1358   /* First check whether the set element already exists.  If it does,
1359      do nothing so that we don't have to free() the old element and
1360      then strdup() a new one.  */
1361   if (hash_table_contains (ht, s))
1362     return;
1363
1364   /* We use "1" as value.  It provides us a useful and clear arbitrary
1365      value, and it consumes no memory -- the pointers to the same
1366      string "1" will be shared by all the key-value pairs in all `set'
1367      hash tables.  */
1368   hash_table_put (ht, xstrdup (s), "1");
1369 }
1370
1371 /* Synonym for hash_table_contains... */
1372
1373 int
1374 string_set_contains (struct hash_table *ht, const char *s)
1375 {
1376   return hash_table_contains (ht, s);
1377 }
1378
1379 /* Convert the specified string set to array.  ARRAY should be large
1380    enough to hold hash_table_count(ht) char pointers.  */
1381
1382 void string_set_to_array (struct hash_table *ht, char **array)
1383 {
1384   hash_table_iterator iter;
1385   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1386     *array++ = iter.key;
1387 }
1388
1389 /* Free the string set.  This frees both the storage allocated for
1390    keys and the actual hash table.  (hash_table_destroy would only
1391    destroy the hash table.)  */
1392
1393 void
1394 string_set_free (struct hash_table *ht)
1395 {
1396   hash_table_iterator iter;
1397   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1398     xfree (iter.key);
1399   hash_table_destroy (ht);
1400 }
1401
1402 /* Utility function: simply call xfree() on all keys and values of HT.  */
1403
1404 void
1405 free_keys_and_values (struct hash_table *ht)
1406 {
1407   hash_table_iterator iter;
1408   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1409     {
1410       xfree (iter.key);
1411       xfree (iter.value);
1412     }
1413 }
1414 \f
1415 /* Get digit grouping data for thousand separors by calling
1416    localeconv().  The data includes separator string and grouping info
1417    and is cached after the first call to the function.
1418
1419    In locales that don't set a thousand separator (such as the "C"
1420    locale), this forces it to be ",".  We are now only showing
1421    thousand separators in one place, so this shouldn't be a problem in
1422    practice.  */
1423
1424 static void
1425 get_grouping_data (const char **sep, const char **grouping)
1426 {
1427   static const char *cached_sep;
1428   static const char *cached_grouping;
1429   static bool initialized;
1430   if (!initialized)
1431     {
1432       /* Get the grouping info from the locale. */
1433       struct lconv *lconv = localeconv ();
1434       cached_sep = lconv->thousands_sep;
1435       cached_grouping = lconv->grouping;
1436 #if ! USE_NLS_PROGRESS_BAR
1437       /* We can't count column widths, so ensure that the separator
1438        * is single-byte only (let check below determine what byte). */
1439       if (strlen(cached_sep) > 1)
1440         cached_sep = "";
1441 #endif
1442       if (!*cached_sep)
1443         {
1444           /* Many locales (such as "C" or "hr_HR") don't specify
1445              grouping, which we still want to use it for legibility.
1446              In those locales set the sep char to ',', unless that
1447              character is used for decimal point, in which case set it
1448              to ".".  */
1449           if (*lconv->decimal_point != ',')
1450             cached_sep = ",";
1451           else
1452             cached_sep = ".";
1453           cached_grouping = "\x03";
1454         }
1455       initialized = true;
1456     }
1457   *sep = cached_sep;
1458   *grouping = cached_grouping;
1459 }
1460
1461 /* Return a printed representation of N with thousand separators.
1462    This should respect locale settings, with the exception of the "C"
1463    locale which mandates no separator, but we use one anyway.
1464
1465    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1466    the separators because it's too non-portable, and it's hard to test
1467    for this feature at configure time.  Besides, it wouldn't display
1468    separators in the "C" locale, still used by many Unix users.  */
1469
1470 const char *
1471 with_thousand_seps (wgint n)
1472 {
1473   static char outbuf[48];
1474   char *p = outbuf + sizeof outbuf;
1475
1476   /* Info received from locale */
1477   const char *grouping, *sep;
1478   int seplen;
1479
1480   /* State information */
1481   int i = 0, groupsize;
1482   const char *atgroup;
1483
1484   bool negative = n < 0;
1485
1486   /* Initialize grouping data. */
1487   get_grouping_data (&sep, &grouping);
1488   seplen = strlen (sep);
1489   atgroup = grouping;
1490   groupsize = *atgroup++;
1491
1492   /* This would overflow on WGINT_MIN, but printing negative numbers
1493      is not an important goal of this fuinction.  */
1494   if (negative)
1495     n = -n;
1496
1497   /* Write the number into the buffer, backwards, inserting the
1498      separators as necessary.  */
1499   *--p = '\0';
1500   while (1)
1501     {
1502       *--p = n % 10 + '0';
1503       n /= 10;
1504       if (n == 0)
1505         break;
1506       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1507       if (++i == groupsize)
1508         {
1509           if (seplen == 1)
1510             *--p = *sep;
1511           else
1512             memcpy (p -= seplen, sep, seplen);
1513           i = 0;
1514           if (*atgroup)
1515             groupsize = *atgroup++;
1516         }
1517     }
1518   if (negative)
1519     *--p = '-';
1520
1521   return p;
1522 }
1523
1524 /* N, a byte quantity, is converted to a human-readable abberviated
1525    form a la sizes printed by `ls -lh'.  The result is written to a
1526    static buffer, a pointer to which is returned.
1527
1528    Unlike `with_thousand_seps', this approximates to the nearest unit.
1529    Quoting GNU libit: "Most people visually process strings of 3-4
1530    digits effectively, but longer strings of digits are more prone to
1531    misinterpretation.  Hence, converting to an abbreviated form
1532    usually improves readability."
1533
1534    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1535    original computer-related meaning of "powers of 1024".  We don't
1536    use the "*bibyte" names invented in 1998, and seldom used in
1537    practice.  Wikipedia's entry on "binary prefix" discusses this in
1538    some detail.  */
1539
1540 char *
1541 human_readable (HR_NUMTYPE n)
1542 {
1543   /* These suffixes are compatible with those of GNU `ls -lh'. */
1544   static char powers[] =
1545     {
1546       'K',                      /* kilobyte, 2^10 bytes */
1547       'M',                      /* megabyte, 2^20 bytes */
1548       'G',                      /* gigabyte, 2^30 bytes */
1549       'T',                      /* terabyte, 2^40 bytes */
1550       'P',                      /* petabyte, 2^50 bytes */
1551       'E',                      /* exabyte,  2^60 bytes */
1552     };
1553   static char buf[8];
1554   size_t i;
1555
1556   /* If the quantity is smaller than 1K, just print it. */
1557   if (n < 1024)
1558     {
1559       snprintf (buf, sizeof (buf), "%d", (int) n);
1560       return buf;
1561     }
1562
1563   /* Loop over powers, dividing N with 1024 in each iteration.  This
1564      works unchanged for all sizes of wgint, while still avoiding
1565      non-portable `long double' arithmetic.  */
1566   for (i = 0; i < countof (powers); i++)
1567     {
1568       /* At each iteration N is greater than the *subsequent* power.
1569          That way N/1024.0 produces a decimal number in the units of
1570          *this* power.  */
1571       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1572         {
1573           double val = n / 1024.0;
1574           /* Print values smaller than 10 with one decimal digits, and
1575              others without any decimals.  */
1576           snprintf (buf, sizeof (buf), "%.*f%c",
1577                     val < 10 ? 1 : 0, val, powers[i]);
1578           return buf;
1579         }
1580       n /= 1024;
1581     }
1582   return NULL;                  /* unreached */
1583 }
1584
1585 /* Count the digits in the provided number.  Used to allocate space
1586    when printing numbers.  */
1587
1588 int
1589 numdigit (wgint number)
1590 {
1591   int cnt = 1;
1592   if (number < 0)
1593     ++cnt;                      /* accomodate '-' */
1594   while ((number /= 10) != 0)
1595     ++cnt;
1596   return cnt;
1597 }
1598
1599 #define PR(mask) *p++ = n / (mask) + '0'
1600
1601 /* DIGITS_<D> is used to print a D-digit number and should be called
1602    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1603    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1604    Recursively this continues until DIGITS_1 is invoked.  */
1605
1606 #define DIGITS_1(mask) PR (mask)
1607 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1608 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1609 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1610 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1611 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1612 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1613 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1614 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1615 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1616
1617 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1618
1619 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1620 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1621 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1622 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1623 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1624 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1625 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1626 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1627 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1628
1629 /* Shorthand for casting to wgint. */
1630 #define W wgint
1631
1632 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1633    `sprintf(buffer, "%lld", (long long) number)', only typically much
1634    faster and portable to machines without long long.
1635
1636    The speedup may make a difference in programs that frequently
1637    convert numbers to strings.  Some implementations of sprintf,
1638    particularly the one in some versions of GNU libc, have been known
1639    to be quite slow when converting integers to strings.
1640
1641    Return the pointer to the location where the terminating zero was
1642    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1643    function is done.)
1644
1645    BUFFER should be large enough to accept as many bytes as you expect
1646    the number to take up.  On machines with 64-bit wgints the maximum
1647    needed size is 24 bytes.  That includes the digits needed for the
1648    largest 64-bit number, the `-' sign in case it's negative, and the
1649    terminating '\0'.  */
1650
1651 char *
1652 number_to_string (char *buffer, wgint number)
1653 {
1654   char *p = buffer;
1655   wgint n = number;
1656
1657   int last_digit_char = 0;
1658
1659 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1660   /* We are running in a very strange environment.  Leave the correct
1661      printing to sprintf.  */
1662   p += sprintf (buf, "%j", (intmax_t) (n));
1663 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1664
1665   if (n < 0)
1666     {
1667       if (n < -WGINT_MAX)
1668         {
1669           /* n = -n would overflow because -n would evaluate to a
1670              wgint value larger than WGINT_MAX.  Need to make n
1671              smaller and handle the last digit separately.  */
1672           int last_digit = n % 10;
1673           /* The sign of n%10 is implementation-defined. */
1674           if (last_digit < 0)
1675             last_digit_char = '0' - last_digit;
1676           else
1677             last_digit_char = '0' + last_digit;
1678           /* After n is made smaller, -n will not overflow. */
1679           n /= 10;
1680         }
1681
1682       *p++ = '-';
1683       n = -n;
1684     }
1685
1686   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1687      way printing any N is fully open-coded without a loop or jump.
1688      (Also see description of DIGITS_*.)  */
1689
1690   if      (n < 10)                       DIGITS_1 (1);
1691   else if (n < 100)                      DIGITS_2 (10);
1692   else if (n < 1000)                     DIGITS_3 (100);
1693   else if (n < 10000)                    DIGITS_4 (1000);
1694   else if (n < 100000)                   DIGITS_5 (10000);
1695   else if (n < 1000000)                  DIGITS_6 (100000);
1696   else if (n < 10000000)                 DIGITS_7 (1000000);
1697   else if (n < 100000000)                DIGITS_8 (10000000);
1698   else if (n < 1000000000)               DIGITS_9 (100000000);
1699 #if SIZEOF_WGINT == 4
1700   /* wgint is 32 bits wide: no number has more than 10 digits. */
1701   else                                   DIGITS_10 (1000000000);
1702 #else
1703   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1704      Constants are constructed by compile-time multiplication to avoid
1705      dealing with different notations for 64-bit constants
1706      (nL/nLL/nI64, depending on the compiler and architecture).  */
1707   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1708   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1709   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1710   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1711   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1712   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1713   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1714   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1715   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1716   else                                   DIGITS_19 (1000000000*(W)1000000000);
1717 #endif
1718
1719   if (last_digit_char)
1720     *p++ = last_digit_char;
1721
1722   *p = '\0';
1723 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1724
1725   return p;
1726 }
1727
1728 #undef PR
1729 #undef W
1730 #undef SPRINTF_WGINT
1731 #undef DIGITS_1
1732 #undef DIGITS_2
1733 #undef DIGITS_3
1734 #undef DIGITS_4
1735 #undef DIGITS_5
1736 #undef DIGITS_6
1737 #undef DIGITS_7
1738 #undef DIGITS_8
1739 #undef DIGITS_9
1740 #undef DIGITS_10
1741 #undef DIGITS_11
1742 #undef DIGITS_12
1743 #undef DIGITS_13
1744 #undef DIGITS_14
1745 #undef DIGITS_15
1746 #undef DIGITS_16
1747 #undef DIGITS_17
1748 #undef DIGITS_18
1749 #undef DIGITS_19
1750
1751 #define RING_SIZE 3
1752
1753 /* Print NUMBER to a statically allocated string and return a pointer
1754    to the printed representation.
1755
1756    This function is intended to be used in conjunction with printf.
1757    It is hard to portably print wgint values:
1758     a) you cannot use printf("%ld", number) because wgint can be long
1759        long on 32-bit machines with LFS.
1760     b) you cannot use printf("%lld", number) because NUMBER could be
1761        long on 32-bit machines without LFS, or on 64-bit machines,
1762        which do not require LFS.  Also, Windows doesn't support %lld.
1763     c) you cannot use printf("%j", (int_max_t) number) because not all
1764        versions of printf support "%j", the most notable being the one
1765        on Windows.
1766     d) you cannot #define WGINT_FMT to the appropriate format and use
1767        printf(WGINT_FMT, number) because that would break translations
1768        for user-visible messages, such as printf("Downloaded: %d
1769        bytes\n", number).
1770
1771    What you should use instead is printf("%s", number_to_static_string
1772    (number)).
1773
1774    CAVEAT: since the function returns pointers to static data, you
1775    must be careful to copy its result before calling it again.
1776    However, to make it more useful with printf, the function maintains
1777    an internal ring of static buffers to return.  That way things like
1778    printf("%s %s", number_to_static_string (num1),
1779    number_to_static_string (num2)) work as expected.  Three buffers
1780    are currently used, which means that "%s %s %s" will work, but "%s
1781    %s %s %s" won't.  If you need to print more than three wgints,
1782    bump the RING_SIZE (or rethink your message.)  */
1783
1784 char *
1785 number_to_static_string (wgint number)
1786 {
1787   static char ring[RING_SIZE][24];
1788   static int ringpos;
1789   char *buf = ring[ringpos];
1790   number_to_string (buf, number);
1791   ringpos = (ringpos + 1) % RING_SIZE;
1792   return buf;
1793 }
1794 \f
1795 /* Determine the width of the terminal we're running on.  If that's
1796    not possible, return 0.  */
1797
1798 int
1799 determine_screen_width (void)
1800 {
1801   /* If there's a way to get the terminal size using POSIX
1802      tcgetattr(), somebody please tell me.  */
1803 #ifdef TIOCGWINSZ
1804   int fd;
1805   struct winsize wsz;
1806
1807   if (opt.lfilename != NULL)
1808     return 0;
1809
1810   fd = fileno (stderr);
1811   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1812     return 0;                   /* most likely ENOTTY */
1813
1814   return wsz.ws_col;
1815 #elif defined(WINDOWS)
1816   CONSOLE_SCREEN_BUFFER_INFO csbi;
1817   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1818     return 0;
1819   return csbi.dwSize.X;
1820 #else  /* neither TIOCGWINSZ nor WINDOWS */
1821   return 0;
1822 #endif /* neither TIOCGWINSZ nor WINDOWS */
1823 }
1824 \f
1825 /* Whether the rnd system (either rand or [dl]rand48) has been
1826    seeded.  */
1827 static int rnd_seeded;
1828
1829 /* Return a random number between 0 and MAX-1, inclusive.
1830
1831    If the system does not support lrand48 and MAX is greater than the
1832    value of RAND_MAX+1 on the system, the returned value will be in
1833    the range [0, RAND_MAX].  This may be fixed in a future release.
1834    The random number generator is seeded automatically the first time
1835    it is called.
1836
1837    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1838    for cryptography.  It is only meant to be used in situations where
1839    quality of the random numbers returned doesn't really matter.  */
1840
1841 int
1842 random_number (int max)
1843 {
1844 #ifdef HAVE_DRAND48
1845   if (!rnd_seeded)
1846     {
1847       srand48 ((long) time (NULL) ^ (long) getpid ());
1848       rnd_seeded = 1;
1849     }
1850   return lrand48 () % max;
1851 #else  /* not HAVE_DRAND48 */
1852
1853   double bounded;
1854   int rnd;
1855   if (!rnd_seeded)
1856     {
1857       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1858       rnd_seeded = 1;
1859     }
1860   rnd = rand ();
1861
1862   /* Like rand() % max, but uses the high-order bits for better
1863      randomness on architectures where rand() is implemented using a
1864      simple congruential generator.  */
1865
1866   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1867   return (int) bounded;
1868
1869 #endif /* not HAVE_DRAND48 */
1870 }
1871
1872 /* Return a random uniformly distributed floating point number in the
1873    [0, 1) range.  Uses drand48 where available, and a really lame
1874    kludge elsewhere.  */
1875
1876 double
1877 random_float (void)
1878 {
1879 #ifdef HAVE_DRAND48
1880   if (!rnd_seeded)
1881     {
1882       srand48 ((long) time (NULL) ^ (long) getpid ());
1883       rnd_seeded = 1;
1884     }
1885   return drand48 ();
1886 #else  /* not HAVE_DRAND48 */
1887   return (  random_number (10000) / 10000.0
1888           + random_number (10000) / (10000.0 * 10000.0)
1889           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1890           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1891 #endif /* not HAVE_DRAND48 */
1892 }
1893 \f
1894 /* Implementation of run_with_timeout, a generic timeout-forcing
1895    routine for systems with Unix-like signal handling.  */
1896
1897 #ifdef USE_SIGNAL_TIMEOUT
1898 # ifdef HAVE_SIGSETJMP
1899 #  define SETJMP(env) sigsetjmp (env, 1)
1900
1901 static sigjmp_buf run_with_timeout_env;
1902
1903 static void
1904 abort_run_with_timeout (int sig)
1905 {
1906   assert (sig == SIGALRM);
1907   siglongjmp (run_with_timeout_env, -1);
1908 }
1909 # else /* not HAVE_SIGSETJMP */
1910 #  define SETJMP(env) setjmp (env)
1911
1912 static jmp_buf run_with_timeout_env;
1913
1914 static void
1915 abort_run_with_timeout (int sig)
1916 {
1917   assert (sig == SIGALRM);
1918   /* We don't have siglongjmp to preserve the set of blocked signals;
1919      if we longjumped out of the handler at this point, SIGALRM would
1920      remain blocked.  We must unblock it manually. */
1921   int mask = siggetmask ();
1922   mask &= ~sigmask (SIGALRM);
1923   sigsetmask (mask);
1924
1925   /* Now it's safe to longjump. */
1926   longjmp (run_with_timeout_env, -1);
1927 }
1928 # endif /* not HAVE_SIGSETJMP */
1929
1930 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1931    setitimer where available, alarm otherwise.
1932
1933    TIMEOUT should be non-zero.  If the timeout value is so small that
1934    it would be rounded to zero, it is rounded to the least legal value
1935    instead (1us for setitimer, 1s for alarm).  That ensures that
1936    SIGALRM will be delivered in all cases.  */
1937
1938 static void
1939 alarm_set (double timeout)
1940 {
1941 #ifdef ITIMER_REAL
1942   /* Use the modern itimer interface. */
1943   struct itimerval itv;
1944   xzero (itv);
1945   itv.it_value.tv_sec = (long) timeout;
1946   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1947   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1948     /* Ensure that we wait for at least the minimum interval.
1949        Specifying zero would mean "wait forever".  */
1950     itv.it_value.tv_usec = 1;
1951   setitimer (ITIMER_REAL, &itv, NULL);
1952 #else  /* not ITIMER_REAL */
1953   /* Use the old alarm() interface. */
1954   int secs = (int) timeout;
1955   if (secs == 0)
1956     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1957        because alarm(0) means "never deliver the alarm", i.e. "wait
1958        forever", which is not what someone who specifies a 0.5s
1959        timeout would expect.  */
1960     secs = 1;
1961   alarm (secs);
1962 #endif /* not ITIMER_REAL */
1963 }
1964
1965 /* Cancel the alarm set with alarm_set. */
1966
1967 static void
1968 alarm_cancel (void)
1969 {
1970 #ifdef ITIMER_REAL
1971   struct itimerval disable;
1972   xzero (disable);
1973   setitimer (ITIMER_REAL, &disable, NULL);
1974 #else  /* not ITIMER_REAL */
1975   alarm (0);
1976 #endif /* not ITIMER_REAL */
1977 }
1978
1979 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1980    seconds.  Returns true if the function was interrupted with a
1981    timeout, false otherwise.
1982
1983    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1984    using setitimer() or alarm().  The timeout is enforced by
1985    longjumping out of the SIGALRM handler.  This has several
1986    advantages compared to the traditional approach of relying on
1987    signals causing system calls to exit with EINTR:
1988
1989      * The callback function is *forcibly* interrupted after the
1990        timeout expires, (almost) regardless of what it was doing and
1991        whether it was in a syscall.  For example, a calculation that
1992        takes a long time is interrupted as reliably as an IO
1993        operation.
1994
1995      * It works with both SYSV and BSD signals because it doesn't
1996        depend on the default setting of SA_RESTART.
1997
1998      * It doesn't require special handler setup beyond a simple call
1999        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2000        optional.)
2001
2002    The only downside is that, if FUN allocates internal resources that
2003    are normally freed prior to exit from the functions, they will be
2004    lost in case of timeout.  */
2005
2006 bool
2007 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2008 {
2009   int saved_errno;
2010
2011   if (timeout == 0)
2012     {
2013       fun (arg);
2014       return false;
2015     }
2016
2017   signal (SIGALRM, abort_run_with_timeout);
2018   if (SETJMP (run_with_timeout_env) != 0)
2019     {
2020       /* Longjumped out of FUN with a timeout. */
2021       signal (SIGALRM, SIG_DFL);
2022       return true;
2023     }
2024   alarm_set (timeout);
2025   fun (arg);
2026
2027   /* Preserve errno in case alarm() or signal() modifies it. */
2028   saved_errno = errno;
2029   alarm_cancel ();
2030   signal (SIGALRM, SIG_DFL);
2031   errno = saved_errno;
2032
2033   return false;
2034 }
2035
2036 #else  /* not USE_SIGNAL_TIMEOUT */
2037
2038 #ifndef WINDOWS
2039 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2040    define it under Windows, because Windows has its own version of
2041    run_with_timeout that uses threads.  */
2042
2043 bool
2044 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2045 {
2046   fun (arg);
2047   return false;
2048 }
2049 #endif /* not WINDOWS */
2050 #endif /* not USE_SIGNAL_TIMEOUT */
2051 \f
2052 #ifndef WINDOWS
2053
2054 /* Sleep the specified amount of seconds.  On machines without
2055    nanosleep(), this may sleep shorter if interrupted by signals.  */
2056
2057 void
2058 xsleep (double seconds)
2059 {
2060 #ifdef HAVE_NANOSLEEP
2061   /* nanosleep is the preferred interface because it offers high
2062      accuracy and, more importantly, because it allows us to reliably
2063      restart receiving a signal such as SIGWINCH.  (There was an
2064      actual Debian bug report about --limit-rate malfunctioning while
2065      the terminal was being resized.)  */
2066   struct timespec sleep, remaining;
2067   sleep.tv_sec = (long) seconds;
2068   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2069   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2070     /* If nanosleep has been interrupted by a signal, adjust the
2071        sleeping period and return to sleep.  */
2072     sleep = remaining;
2073 #elif defined(HAVE_USLEEP)
2074   /* If usleep is available, use it in preference to select.  */
2075   if (seconds >= 1)
2076     {
2077       /* On some systems, usleep cannot handle values larger than
2078          1,000,000.  If the period is larger than that, use sleep
2079          first, then add usleep for subsecond accuracy.  */
2080       sleep (seconds);
2081       seconds -= (long) seconds;
2082     }
2083   usleep (seconds * 1000000);
2084 #else /* fall back select */
2085   /* Note that, although Windows supports select, it can't be used to
2086      implement sleeping because Winsock's select doesn't implement
2087      timeout when it is passed NULL pointers for all fd sets.  (But it
2088      does under Cygwin, which implements Unix-compatible select.)  */
2089   struct timeval sleep;
2090   sleep.tv_sec = (long) seconds;
2091   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2092   select (0, NULL, NULL, NULL, &sleep);
2093   /* If select returns -1 and errno is EINTR, it means we were
2094      interrupted by a signal.  But without knowing how long we've
2095      actually slept, we can't return to sleep.  Using gettimeofday to
2096      track sleeps is slow and unreliable due to clock skew.  */
2097 #endif
2098 }
2099
2100 #endif /* not WINDOWS */
2101
2102 /* Encode the octets in DATA of length LENGTH to base64 format,
2103    storing the result to DEST.  The output will be zero-terminated,
2104    and must point to a writable buffer of at least
2105    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2106    the resulting base64 data, not counting the terminating zero.
2107
2108    This implementation does not emit newlines after 76 characters of
2109    base64 data.  */
2110
2111 int
2112 base64_encode (const void *data, int length, char *dest)
2113 {
2114   /* Conversion table.  */
2115   static const char tbl[64] = {
2116     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2117     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2118     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2119     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2120   };
2121   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2122      don't work for data with MSB set. */
2123   const unsigned char *s = data;
2124   /* Theoretical ANSI violation when length < 3. */
2125   const unsigned char *end = (const unsigned char *) data + length - 2;
2126   char *p = dest;
2127
2128   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2129   for (; s < end; s += 3)
2130     {
2131       *p++ = tbl[s[0] >> 2];
2132       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2133       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2134       *p++ = tbl[s[2] & 0x3f];
2135     }
2136
2137   /* Pad the result if necessary...  */
2138   switch (length % 3)
2139     {
2140     case 1:
2141       *p++ = tbl[s[0] >> 2];
2142       *p++ = tbl[(s[0] & 3) << 4];
2143       *p++ = '=';
2144       *p++ = '=';
2145       break;
2146     case 2:
2147       *p++ = tbl[s[0] >> 2];
2148       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2149       *p++ = tbl[((s[1] & 0xf) << 2)];
2150       *p++ = '=';
2151       break;
2152     }
2153   /* ...and zero-terminate it.  */
2154   *p = '\0';
2155
2156   return p - dest;
2157 }
2158
2159 /* Store in C the next non-whitespace character from the string, or \0
2160    when end of string is reached.  */
2161 #define NEXT_CHAR(c, p) do {                    \
2162   c = (unsigned char) *p++;                     \
2163 } while (c_isspace (c))
2164
2165 #define IS_ASCII(c) (((c) & 0x80) == 0)
2166
2167 /* Decode data from BASE64 (a null-terminated string) into memory
2168    pointed to by DEST.  DEST is assumed to be large enough to
2169    accomodate the decoded data, which is guaranteed to be no more than
2170    3/4*strlen(base64).
2171
2172    Since DEST is assumed to contain binary data, it is not
2173    NUL-terminated.  The function returns the length of the data
2174    written to TO.  -1 is returned in case of error caused by malformed
2175    base64 input.
2176
2177    This function originates from Free Recode.  */
2178
2179 int
2180 base64_decode (const char *base64, void *dest)
2181 {
2182   /* Table of base64 values for first 128 characters.  Note that this
2183      assumes ASCII (but so does Wget in other places).  */
2184   static const signed char base64_char_to_value[128] =
2185     {
2186       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2187       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2188       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2189       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2190       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2191       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2192       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2193       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2194       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2195       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2196       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2197       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2198       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2199     };
2200 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2201 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2202
2203   const char *p = base64;
2204   char *q = dest;
2205
2206   while (1)
2207     {
2208       unsigned char c;
2209       unsigned long value;
2210
2211       /* Process first byte of a quadruplet.  */
2212       NEXT_CHAR (c, p);
2213       if (!c)
2214         break;
2215       if (c == '=' || !IS_BASE64 (c))
2216         return -1;              /* illegal char while decoding base64 */
2217       value = BASE64_CHAR_TO_VALUE (c) << 18;
2218
2219       /* Process second byte of a quadruplet.  */
2220       NEXT_CHAR (c, p);
2221       if (!c)
2222         return -1;              /* premature EOF while decoding base64 */
2223       if (c == '=' || !IS_BASE64 (c))
2224         return -1;              /* illegal char while decoding base64 */
2225       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2226       *q++ = value >> 16;
2227
2228       /* Process third byte of a quadruplet.  */
2229       NEXT_CHAR (c, p);
2230       if (!c)
2231         return -1;              /* premature EOF while decoding base64 */
2232       if (!IS_BASE64 (c))
2233         return -1;              /* illegal char while decoding base64 */
2234
2235       if (c == '=')
2236         {
2237           NEXT_CHAR (c, p);
2238           if (!c)
2239             return -1;          /* premature EOF while decoding base64 */
2240           if (c != '=')
2241             return -1;          /* padding `=' expected but not found */
2242           continue;
2243         }
2244
2245       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2246       *q++ = 0xff & value >> 8;
2247
2248       /* Process fourth byte of a quadruplet.  */
2249       NEXT_CHAR (c, p);
2250       if (!c)
2251         return -1;              /* premature EOF while decoding base64 */
2252       if (c == '=')
2253         continue;
2254       if (!IS_BASE64 (c))
2255         return -1;              /* illegal char while decoding base64 */
2256
2257       value |= BASE64_CHAR_TO_VALUE (c);
2258       *q++ = 0xff & value;
2259     }
2260 #undef IS_BASE64
2261 #undef BASE64_CHAR_TO_VALUE
2262
2263   return q - (char *) dest;
2264 }
2265
2266 #undef IS_ASCII
2267 #undef NEXT_CHAR
2268 \f
2269 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2270    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2271
2272 static void
2273 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2274                     int (*cmpfun) (const void *, const void *))
2275 {
2276 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2277   if (from < to)
2278     {
2279       size_t i, j, k;
2280       size_t mid = (to + from) / 2;
2281       mergesort_internal (base, temp, size, from, mid, cmpfun);
2282       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2283       i = from;
2284       j = mid + 1;
2285       for (k = from; (i <= mid) && (j <= to); k++)
2286         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2287           memcpy (ELT (temp, k), ELT (base, i++), size);
2288         else
2289           memcpy (ELT (temp, k), ELT (base, j++), size);
2290       while (i <= mid)
2291         memcpy (ELT (temp, k++), ELT (base, i++), size);
2292       while (j <= to)
2293         memcpy (ELT (temp, k++), ELT (base, j++), size);
2294       for (k = from; k <= to; k++)
2295         memcpy (ELT (base, k), ELT (temp, k), size);
2296     }
2297 #undef ELT
2298 }
2299
2300 /* Stable sort with interface exactly like standard library's qsort.
2301    Uses mergesort internally, allocating temporary storage with
2302    alloca.  */
2303
2304 void
2305 stable_sort (void *base, size_t nmemb, size_t size,
2306              int (*cmpfun) (const void *, const void *))
2307 {
2308   if (size > 1)
2309     {
2310       void *temp = alloca (nmemb * size * sizeof (void *));
2311       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2312     }
2313 }
2314 \f
2315 /* Print a decimal number.  If it is equal to or larger than ten, the
2316    number is rounded.  Otherwise it is printed with one significant
2317    digit without trailing zeros and with no more than three fractional
2318    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2319    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2320
2321    This is useful for displaying durations because it provides
2322    order-of-magnitude information without unnecessary clutter --
2323    long-running downloads are shown without the fractional part, and
2324    short ones still retain one significant digit.  */
2325
2326 const char *
2327 print_decimal (double number)
2328 {
2329   static char buf[32];
2330   double n = number >= 0 ? number : -number;
2331
2332   if (n >= 9.95)
2333     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2334        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2335     snprintf (buf, sizeof buf, "%.0f", number);
2336   else if (n >= 0.95)
2337     snprintf (buf, sizeof buf, "%.1f", number);
2338   else if (n >= 0.001)
2339     snprintf (buf, sizeof buf, "%.1g", number);
2340   else if (n >= 0.0005)
2341     /* round [0.0005, 0.001) to 0.001 */
2342     snprintf (buf, sizeof buf, "%.3f", number);
2343   else
2344     /* print numbers close to 0 as 0, not 0.000 */
2345     strcpy (buf, "0");
2346
2347   return buf;
2348 }
2349
2350 #ifdef TESTING
2351
2352 const char *
2353 test_subdir_p()
2354 {
2355   int i;
2356   struct {
2357     char *d1;
2358     char *d2;
2359     bool result;
2360   } test_array[] = {
2361     { "/somedir", "/somedir", true },
2362     { "/somedir", "/somedir/d2", true },
2363     { "/somedir/d1", "/somedir", false },
2364   };
2365
2366   for (i = 0; i < countof(test_array); ++i)
2367     {
2368       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2369
2370       mu_assert ("test_subdir_p: wrong result",
2371                  res == test_array[i].result);
2372     }
2373
2374   return NULL;
2375 }
2376
2377 const char *
2378 test_dir_matches_p()
2379 {
2380   int i;
2381   struct {
2382     char *dirlist[3];
2383     char *dir;
2384     bool result;
2385   } test_array[] = {
2386     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2387     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2388     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2389     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2390     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2391     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2392     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2393     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2394     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2395     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2396     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2397     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2398     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2399     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2400     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2401   };
2402
2403   for (i = 0; i < countof(test_array); ++i)
2404     {
2405       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2406
2407       mu_assert ("test_dir_matches_p: wrong result",
2408                  res == test_array[i].result);
2409     }
2410
2411   return NULL;
2412 }
2413
2414 #endif /* TESTING */
2415