sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #include <errno.h>
  46 #include <fcntl.h>
  47 #include <assert.h>
  48 #include <stdarg.h>
  49 #include <locale.h>
  50
  51 #if HAVE_UTIME
  52 # include <sys/types.h>
  53 # ifdef HAVE_UTIME_H
  54 #  include <utime.h>
  55 # endif
  56
  57 # ifdef HAVE_SYS_UTIME_H
  58 #  include <sys/utime.h>
  59 # endif
  60 #endif
  61
  62 #include <sys/stat.h>
  63
  64 /* For TIOCGWINSZ and friends: */
  65 #ifdef HAVE_SYS_IOCTL_H
  66 # include <sys/ioctl.h>
  67 #endif
  68 #ifdef HAVE_TERMIOS_H
  69 # include <termios.h>
  70 #endif
  71
  72 /* Needed for Unix version of run_with_timeout. */
  73 #include <signal.h>
  74 #include <setjmp.h>
  75
  76 #ifndef HAVE_SIGSETJMP
  77 /* If sigsetjmp is a macro, configure won't pick it up. */
  78 # ifdef sigsetjmp
  79 #  define HAVE_SIGSETJMP
  80 # endif
  81 #endif
  82
  83 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  84 # define USE_SIGNAL_TIMEOUT
  85 #endif
  86
  87 #include "utils.h"
  88 #include "hash.h"
  89
  90 #ifdef __VMS
  91 #include "vms.h"
  92 #endif /* def __VMS */
  93
  94 #ifdef TESTING
  95 #include "test.h"
  96 #endif
  97
  98 static void
  99 memfatal (const char *context, long attempted_size)
 100 {
 101   /* Make sure we don't try to store part of the log line, and thus
 102      call malloc.  */
 103   log_set_save_context (false);
 104
 105   /* We have different log outputs in different situations:
 106      1) output without bytes information
 107      2) output with bytes information  */
 108   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 109     {
 110       logprintf (LOG_ALWAYS,
 111                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 112                  exec_name, context);
 113     }
 114   else
 115     {
 116       logprintf (LOG_ALWAYS,
 117                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 118                  exec_name, context, attempted_size);
 119     }
 120
 121   exit (1);
 122 }
 123
 124 /* Character property table for (re-)escaping VMS ODS5 extended file
 125    names.  Note that this table ignores Unicode.
 126
 127    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 128
 129    ODS5 Invalid characters:
 130       C0 control codes (0x00 to 0x1F inclusive)
 131       Asterisk (*)
 132       Question mark (?)
 133
 134    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 135       Double quotation marks (")
 136       Backslash (\)
 137       Colon (:)
 138       Left angle bracket (<)
 139       Right angle bracket (>)
 140       Slash (/)
 141       Vertical bar (|)
 142
 143    Characters escaped by "^":
 144       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 145        @  [  \  ]  ^  `  {  |  }  ~
 146
 147    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 148    case.  Note that un-escaped < and > can also confuse a directory
 149    spec.
 150
 151    Characters put out as ^xx:
 152       7F (DEL)
 153       80-9F (C1 control characters)
 154       A0 (nonbreaking space)
 155       FF (Latin small letter y diaeresis)
 156
 157    Other cases:
 158       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 159
 160     Property table values:
 161       Normal escape:    1
 162       Space:            2
 163       Dot:              4
 164       Hex-hex escape:   8
 165       ODS2 normal:     16
 166       ODS2 lower case: 32
 167       Hex digit:       64
 168 */
 169
 170 unsigned char char_prop[ 256] = {
 171
 172 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 173     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 174
 175 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 176     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 177
 178 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 179     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 180
 181 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 182    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 183
 184 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 185     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 186
 187 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 188    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 189
 190 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 191     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 192
 193 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 194    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 195
 196     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 197     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 198     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 199     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 200     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 201     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 202     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 203     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 204 };
 205
 206 /* Utility function: like xstrdup(), but also lowercases S.  */
 207
 208 char *
 209 xstrdup_lower (const char *s)
 210 {
 211   char *copy = xstrdup (s);
 212   char *p = copy;
 213   for (; *p; p++)
 214     *p = c_tolower (*p);
 215   return copy;
 216 }
 217
 218 /* Copy the string formed by two pointers (one on the beginning, other
 219    on the char after the last char) to a new, malloc-ed location.
 220    0-terminate it.  */
 221 char *
 222 strdupdelim (const char *beg, const char *end)
 223 {
 224   char *res = xmalloc (end - beg + 1);
 225   memcpy (res, beg, end - beg);
 226   res[end - beg] = '\0';
 227   return res;
 228 }
 229
 230 /* Parse a string containing comma-separated elements, and return a
 231    vector of char pointers with the elements.  Spaces following the
 232    commas are ignored.  */
 233 char **
 234 sepstring (const char *s)
 235 {
 236   char **res;
 237   const char *p;
 238   int i = 0;
 239
 240   if (!s || !*s)
 241     return NULL;
 242   res = NULL;
 243   p = s;
 244   while (*s)
 245     {
 246       if (*s == ',')
 247         {
 248           res = xrealloc (res, (i + 2) * sizeof (char *));
 249           res[i] = strdupdelim (p, s);
 250           res[++i] = NULL;
 251           ++s;
 252           /* Skip the blanks following the ','.  */
 253           while (c_isspace (*s))
 254             ++s;
 255           p = s;
 256         }
 257       else
 258         ++s;
 259     }
 260   res = xrealloc (res, (i + 2) * sizeof (char *));
 261   res[i] = strdupdelim (p, s);
 262   res[i + 1] = NULL;
 263   return res;
 264 }
 265 \f
 266 /* Like sprintf, but prints into a string of sufficient size freshly
 267    allocated with malloc, which is returned.  If unable to print due
 268    to invalid format, returns NULL.  Inability to allocate needed
 269    memory results in abort, as with xmalloc.  This is in spirit
 270    similar to the GNU/BSD extension asprintf, but somewhat easier to
 271    use.
 272
 273    Internally the function either calls vasprintf or loops around
 274    vsnprintf until the correct size is found.  Since Wget also ships a
 275    fallback implementation of vsnprintf, this should be portable.  */
 276
 277 /* Constant is using for limits memory allocation for text buffer.
 278    Applicable in situation when: vasprintf is not available in the system
 279    and vsnprintf return -1 when long line is truncated (in old versions of
 280    glibc and in other system where C99 doesn`t support) */
 281
 282 #define FMT_MAX_LENGTH 1048576
 283
 284 char *
 285 aprintf (const char *fmt, ...)
 286 {
 287 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 288   /* Use vasprintf. */
 289   int ret;
 290   va_list args;
 291   char *str;
 292   va_start (args, fmt);
 293   ret = vasprintf (&str, fmt, args);
 294   va_end (args);
 295   if (ret < 0 && errno == ENOMEM)
 296     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 297                                                       with xmalloc/xrealloc */
 298   else if (ret < 0)
 299     return NULL;
 300   return str;
 301 #else  /* not HAVE_VASPRINTF */
 302
 303   /* vasprintf is unavailable.  snprintf into a small buffer and
 304      resize it as necessary. */
 305   int size = 32;
 306   char *str = xmalloc (size);
 307
 308   /* #### This code will infloop and eventually abort in xrealloc if
 309      passed a FMT that causes snprintf to consistently return -1.  */
 310
 311   while (1)
 312     {
 313       int n;
 314       va_list args;
 315
 316       va_start (args, fmt);
 317       n = vsnprintf (str, size, fmt, args);
 318       va_end (args);
 319
 320       /* If the printing worked, return the string. */
 321       if (n > -1 && n < size)
 322         return str;
 323
 324       /* Else try again with a larger buffer. */
 325       if (n > -1)               /* C99 */
 326         size = n + 1;           /* precisely what is needed */
 327       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 328         {                               /* maybe we have some wrong
 329                                            format string? */
 330           logprintf (LOG_ALWAYS,
 331                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 332                        "aborting.\n"),
 333                      exec_name, size);  /* printout a log message */
 334           abort ();                     /* and abort... */
 335         }
 336       else
 337         {
 338           /* else, we continue to grow our
 339            * buffer: Twice the old size. */
 340           size <<= 1;
 341         }
 342       str = xrealloc (str, size);
 343     }
 344 #endif /* not HAVE_VASPRINTF */
 345 }
 346
 347 /* Concatenate the NULL-terminated list of string arguments into
 348    freshly allocated space.  */
 349
 350 char *
 351 concat_strings (const char *str0, ...)
 352 {
 353   va_list args;
 354   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 355   char *ret, *p;
 356
 357   const char *next_str;
 358   int total_length = 0;
 359   size_t argcount;
 360
 361   /* Calculate the length of and allocate the resulting string. */
 362
 363   argcount = 0;
 364   va_start (args, str0);
 365   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 366     {
 367       int len = strlen (next_str);
 368       if (argcount < countof (saved_lengths))
 369         saved_lengths[argcount++] = len;
 370       total_length += len;
 371     }
 372   va_end (args);
 373   p = ret = xmalloc (total_length + 1);
 374
 375   /* Copy the strings into the allocated space. */
 376
 377   argcount = 0;
 378   va_start (args, str0);
 379   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 380     {
 381       int len;
 382       if (argcount < countof (saved_lengths))
 383         len = saved_lengths[argcount++];
 384       else
 385         len = strlen (next_str);
 386       memcpy (p, next_str, len);
 387       p += len;
 388     }
 389   va_end (args);
 390   *p = '\0';
 391
 392   return ret;
 393 }
 394 \f
 395 /* Format the provided time according to the specified format.  The
 396    format is a string with format elements supported by strftime.  */
 397
 398 static char *
 399 fmttime (time_t t, const char *fmt)
 400 {
 401   static char output[32];
 402   struct tm *tm = localtime(&t);
 403   if (!tm)
 404     abort ();
 405   if (!strftime(output, sizeof(output), fmt, tm))
 406     abort ();
 407   return output;
 408 }
 409
 410 /* Return pointer to a static char[] buffer in which zero-terminated
 411    string-representation of TM (in form hh:mm:ss) is printed.
 412
 413    If TM is NULL, the current time will be used.  */
 414
 415 char *
 416 time_str (time_t t)
 417 {
 418   return fmttime(t, "%H:%M:%S");
 419 }
 420
 421 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 422
 423 char *
 424 datetime_str (time_t t)
 425 {
 426   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 427 }
 428 \f
 429 /* The Windows versions of the following two functions are defined in
 430    mswindows.c. On MSDOS this function should never be called. */
 431
 432 #ifdef __VMS
 433
 434 void
 435 fork_to_background (void)
 436 {
 437   return;
 438 }
 439
 440 #else /* def __VMS */
 441
 442 #if !defined(WINDOWS) && !defined(MSDOS)
 443 void
 444 fork_to_background (void)
 445 {
 446   pid_t pid;
 447   /* Whether we arrange our own version of opt.lfilename here.  */
 448   bool logfile_changed = false;
 449
 450   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 451     {
 452       /* We must create the file immediately to avoid either a race
 453          condition (which arises from using unique_name and failing to
 454          use fopen_excl) or lying to the user about the log file name
 455          (which arises from using unique_name, printing the name, and
 456          using fopen_excl later on.)  */
 457       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 458       if (new_log_fp)
 459         {
 460           logfile_changed = true;
 461           fclose (new_log_fp);
 462         }
 463     }
 464   pid = fork ();
 465   if (pid < 0)
 466     {
 467       /* parent, error */
 468       perror ("fork");
 469       exit (1);
 470     }
 471   else if (pid != 0)
 472     {
 473       /* parent, no error */
 474       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 475       if (logfile_changed)
 476         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 477       exit (0);                 /* #### should we use _exit()? */
 478     }
 479
 480   /* child: give up the privileges and keep running. */
 481   setsid ();
 482   freopen ("/dev/null", "r", stdin);
 483   freopen ("/dev/null", "w", stdout);
 484   freopen ("/dev/null", "w", stderr);
 485 }
 486 #endif /* !WINDOWS && !MSDOS */
 487
 488 #endif /* def __VMS [else] */
 489
 490 \f
 491 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 492    specified with TM.  The atime ("access time") is set to the current
 493    time.  */
 494
 495 void
 496 touch (const char *file, time_t tm)
 497 {
 498 #if HAVE_UTIME
 499 # ifdef HAVE_STRUCT_UTIMBUF
 500   struct utimbuf times;
 501 # else
 502   struct {
 503     time_t actime;
 504     time_t modtime;
 505   } times;
 506 # endif
 507   times.modtime = tm;
 508   times.actime = time (NULL);
 509   if (utime (file, &times) == -1)
 510     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 511 #else
 512   struct timespec timespecs[2];
 513   int fd;
 514
 515   fd = open (file, O_WRONLY);
 516   if (fd < 0)
 517     {
 518       logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
 519       return;
 520     }
 521
 522   timespecs[0].tv_sec = time (NULL);
 523   timespecs[0].tv_nsec = 0L;
 524   timespecs[1].tv_sec = tm;
 525   timespecs[1].tv_nsec = 0L;
 526
 527   if (futimens (fd, timespecs) == -1)
 528     logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
 529
 530   close (fd);
 531 #endif
 532 }
 533
 534 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 535    nothing under MS-Windows.  */
 536 int
 537 remove_link (const char *file)
 538 {
 539   int err = 0;
 540   struct_stat st;
 541
 542   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 543     {
 544       DEBUGP (("Unlinking %s (symlink).\n", file));
 545       err = unlink (file);
 546       if (err != 0)
 547         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 548                    quote (file), strerror (errno));
 549     }
 550   return err;
 551 }
 552
 553 /* Does FILENAME exist?  This is quite a lousy implementation, since
 554    it supplies no error codes -- only a yes-or-no answer.  Thus it
 555    will return that a file does not exist if, e.g., the directory is
 556    unreadable.  I don't mind it too much currently, though.  The
 557    proper way should, of course, be to have a third, error state,
 558    other than true/false, but that would introduce uncalled-for
 559    additional complexity to the callers.  */
 560 bool
 561 file_exists_p (const char *filename)
 562 {
 563 #ifdef HAVE_ACCESS
 564   return access (filename, F_OK) >= 0;
 565 #else
 566   struct_stat buf;
 567   return stat (filename, &buf) >= 0;
 568 #endif
 569 }
 570
 571 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 572    Returns 0 on error.  */
 573 bool
 574 file_non_directory_p (const char *path)
 575 {
 576   struct_stat buf;
 577   /* Use lstat() rather than stat() so that symbolic links pointing to
 578      directories can be identified correctly.  */
 579   if (lstat (path, &buf) != 0)
 580     return false;
 581   return S_ISDIR (buf.st_mode) ? false : true;
 582 }
 583
 584 /* Return the size of file named by FILENAME, or -1 if it cannot be
 585    opened or seeked into. */
 586 wgint
 587 file_size (const char *filename)
 588 {
 589 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 590   wgint size;
 591   /* We use fseek rather than stat to determine the file size because
 592      that way we can also verify that the file is readable without
 593      explicitly checking for permissions.  Inspired by the POST patch
 594      by Arnaud Wylie.  */
 595   FILE *fp = fopen (filename, "rb");
 596   if (!fp)
 597     return -1;
 598   fseeko (fp, 0, SEEK_END);
 599   size = ftello (fp);
 600   fclose (fp);
 601   return size;
 602 #else
 603   struct_stat st;
 604   if (stat (filename, &st) < 0)
 605     return -1;
 606   return st.st_size;
 607 #endif
 608 }
 609
 610 /* 2005-02-19 SMS.
 611    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 612    original name.  With the VMS file systems' versioning, everything
 613    should be fine, and appending ".NN" just causes trouble.
 614 */
 615
 616 #ifdef UNIQ_SEP
 617
 618 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 619    doesn't exist is found.  Return a freshly allocated copy of the
 620    unused file name.  */
 621
 622 static char *
 623 unique_name_1 (const char *prefix)
 624 {
 625   int count = 1;
 626   int plen = strlen (prefix);
 627   char *template = (char *)alloca (plen + 1 + 24);
 628   char *template_tail = template + plen;
 629
 630   memcpy (template, prefix, plen);
 631   *template_tail++ = UNIQ_SEP;
 632
 633   do
 634     number_to_string (template_tail, count++);
 635   while (file_exists_p (template));
 636
 637   return xstrdup (template);
 638 }
 639
 640 /* Return a unique file name, based on FILE.
 641
 642    More precisely, if FILE doesn't exist, it is returned unmodified.
 643    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 644    file name that doesn't exist is returned.
 645
 646    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 647
 648    The resulting file is not created, only verified that it didn't
 649    exist at the point in time when the function was called.
 650    Therefore, where security matters, don't rely that the file created
 651    by this function exists until you open it with O_EXCL or
 652    equivalent.
 653
 654    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 655    string.  Otherwise, it may return FILE if the file doesn't exist
 656    (and therefore doesn't need changing).  */
 657
 658 char *
 659 unique_name (const char *file, bool allow_passthrough)
 660 {
 661   /* If the FILE itself doesn't exist, return it without
 662      modification. */
 663   if (!file_exists_p (file))
 664     return allow_passthrough ? (char *)file : xstrdup (file);
 665
 666   /* Otherwise, find a numeric suffix that results in unused file name
 667      and return it.  */
 668   return unique_name_1 (file);
 669 }
 670
 671 #else /* def UNIQ_SEP */
 672
 673 /* Dummy unique_name() for VMS.  Return the original name as easily as
 674    possible.
 675 */
 676 char *
 677 unique_name (const char *file, bool allow_passthrough)
 678 {
 679   /* Return the FILE itself, without modification, irregardful. */
 680   return allow_passthrough ? (char *)file : xstrdup (file);
 681 }
 682
 683 #endif /* def UNIQ_SEP [else] */
 684
 685 /* Create a file based on NAME, except without overwriting an existing
 686    file with that name.  Providing O_EXCL is correctly implemented,
 687    this function does not have the race condition associated with
 688    opening the file returned by unique_name.  */
 689
 690 FILE *
 691 unique_create (const char *name, bool binary, char **opened_name)
 692 {
 693   /* unique file name, based on NAME */
 694   char *uname = unique_name (name, false);
 695   FILE *fp;
 696   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 697     {
 698       xfree (uname);
 699       uname = unique_name (name, false);
 700     }
 701   if (opened_name && fp != NULL)
 702     {
 703       if (fp)
 704         *opened_name = uname;
 705       else
 706         {
 707           *opened_name = NULL;
 708           xfree (uname);
 709         }
 710     }
 711   else
 712     xfree (uname);
 713   return fp;
 714 }
 715
 716 /* Open the file for writing, with the addition that the file is
 717    opened "exclusively".  This means that, if the file already exists,
 718    this function will *fail* and errno will be set to EEXIST.  If
 719    BINARY is set, the file will be opened in binary mode, equivalent
 720    to fopen's "wb".
 721
 722    If opening the file fails for any reason, including the file having
 723    previously existed, this function returns NULL and sets errno
 724    appropriately.  */
 725
 726 FILE *
 727 fopen_excl (const char *fname, int binary)
 728 {
 729   int fd;
 730 #ifdef O_EXCL
 731
 732 /* 2005-04-14 SMS.
 733    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 734    It also has file versions which obviate all the O_EXCL effort.
 735    O_TRUNC (something of a misnomer) requests a new version.
 736 */
 737 # ifdef __VMS
 738 /* Common open() optional arguments:
 739    sequential access only, access callback function.
 740 */
 741 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 742
 743   int open_id;
 744   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 745
 746   if (binary > 1)
 747     {
 748       open_id = 11;
 749       fd = open( fname,                 /* File name. */
 750        flags,                           /* Flags. */
 751        0777,                            /* Mode for default protection. */
 752        "ctx=bin,stm",                   /* Binary, stream access. */
 753        "rfm=stmlf",                     /* Stream_LF. */
 754        OPEN_OPT_ARGS);                  /* Access callback. */
 755     }
 756   else if (binary)
 757     {
 758       open_id = 12;
 759       fd = open( fname,                 /* File name. */
 760        flags,                           /* Flags. */
 761        0777,                            /* Mode for default protection. */
 762        "ctx=bin,stm",                   /* Binary, stream access. */
 763        "rfm=fix",                       /* Fixed-length, */
 764        "mrs=512",                       /* 512-byte records. */
 765        OPEN_OPT_ARGS);                  /* Access callback. */
 766     }
 767   else
 768     {
 769       open_id = 13;
 770       fd = open( fname,                 /* File name. */
 771        flags,                           /* Flags. */
 772        0777,                            /* Mode for default protection. */
 773        "rfm=stmlf",                     /* Stream_LF. */
 774        OPEN_OPT_ARGS);                  /* Access callback. */
 775     }
 776 # else /* def __VMS */
 777   int flags = O_WRONLY | O_CREAT | O_EXCL;
 778 # ifdef O_BINARY
 779   if (binary)
 780     flags |= O_BINARY;
 781 # endif
 782   fd = open (fname, flags, 0666);
 783 # endif /* def __VMS [else] */
 784
 785   if (fd < 0)
 786     return NULL;
 787   return fdopen (fd, binary ? "wb" : "w");
 788 #else  /* not O_EXCL */
 789   /* Manually check whether the file exists.  This is prone to race
 790      conditions, but systems without O_EXCL haven't deserved
 791      better.  */
 792   if (file_exists_p (fname))
 793     {
 794       errno = EEXIST;
 795       return NULL;
 796     }
 797   return fopen (fname, binary ? "wb" : "w");
 798 #endif /* not O_EXCL */
 799 }
 800 \f
 801 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 802    are missing, create them first.  In case any mkdir() call fails,
 803    return its error status.  Returns 0 on successful completion.
 804
 805    The behaviour of this function should be identical to the behaviour
 806    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 807 int
 808 make_directory (const char *directory)
 809 {
 810   int i, ret, quit = 0;
 811   char *dir;
 812
 813   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 814      function is unsafe if called with a read-only char *argument.  */
 815   STRDUP_ALLOCA (dir, directory);
 816
 817   /* If the first character of dir is '/', skip it (and thus enable
 818      creation of absolute-pathname directories.  */
 819   for (i = (*dir == '/'); 1; ++i)
 820     {
 821       for (; dir[i] && dir[i] != '/'; i++)
 822         ;
 823       if (!dir[i])
 824         quit = 1;
 825       dir[i] = '\0';
 826       /* Check whether the directory already exists.  Allow creation of
 827          of intermediate directories to fail, as the initial path components
 828          are not necessarily directories!  */
 829       if (!file_exists_p (dir))
 830         ret = mkdir (dir, 0777);
 831       else
 832         ret = 0;
 833       if (quit)
 834         break;
 835       else
 836         dir[i] = '/';
 837     }
 838   return ret;
 839 }
 840
 841 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 842    should be a file name.
 843
 844    file_merge("/foo/bar", "baz")  => "/foo/baz"
 845    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 846    file_merge("foo", "bar")       => "bar"
 847
 848    In other words, it's a simpler and gentler version of uri_merge.  */
 849
 850 char *
 851 file_merge (const char *base, const char *file)
 852 {
 853   char *result;
 854   const char *cut = (const char *)strrchr (base, '/');
 855
 856   if (!cut)
 857     return xstrdup (file);
 858
 859   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 860   memcpy (result, base, cut - base);
 861   result[cut - base] = '/';
 862   strcpy (result + (cut - base) + 1, file);
 863
 864   return result;
 865 }
 866 \f
 867 /* Like fnmatch, but performs a case-insensitive match.  */
 868
 869 int
 870 fnmatch_nocase (const char *pattern, const char *string, int flags)
 871 {
 872 #ifdef FNM_CASEFOLD
 873   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 874      also present on *BSD platforms, and possibly elsewhere.  */
 875   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 876 #else
 877   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 878   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 879   char *strcopy = (char *) alloca (strlen (string) + 1);
 880   char *p;
 881   for (p = patcopy; *pattern; pattern++, p++)
 882     *p = c_tolower (*pattern);
 883   *p = '\0';
 884   for (p = strcopy; *string; string++, p++)
 885     *p = c_tolower (*string);
 886   *p = '\0';
 887   return fnmatch (patcopy, strcopy, flags);
 888 #endif
 889 }
 890
 891 static bool in_acclist (const char *const *, const char *, bool);
 892
 893 /* Determine whether a file is acceptable to be followed, according to
 894    lists of patterns to accept/reject.  */
 895 bool
 896 acceptable (const char *s)
 897 {
 898   int l = strlen (s);
 899
 900   if (opt.output_document && strcmp (s, opt.output_document) == 0)
 901     return true;
 902
 903   while (l && s[l] != '/')
 904     --l;
 905   if (s[l] == '/')
 906     s += (l + 1);
 907   if (opt.accepts)
 908     {
 909       if (opt.rejects)
 910         return (in_acclist ((const char *const *)opt.accepts, s, true)
 911                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 912       else
 913         return in_acclist ((const char *const *)opt.accepts, s, true);
 914     }
 915   else if (opt.rejects)
 916     return !in_acclist ((const char *const *)opt.rejects, s, true);
 917   return true;
 918 }
 919
 920 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 921    will return true if and only if D2 begins with `/something/' or is exactly
 922    '/something'.  */
 923 bool
 924 subdir_p (const char *d1, const char *d2)
 925 {
 926   if (*d1 == '\0')
 927     return true;
 928   if (!opt.ignore_case)
 929     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 930       ;
 931   else
 932     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 933       ;
 934
 935   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 936 }
 937
 938 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 939    first element that matches DIR, through wildcards or front comparison (as
 940    appropriate).  */
 941 static bool
 942 dir_matches_p (char **dirlist, const char *dir)
 943 {
 944   char **x;
 945   int (*matcher) (const char *, const char *, int)
 946     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 947
 948   for (x = dirlist; *x; x++)
 949     {
 950       /* Remove leading '/' */
 951       char *p = *x + (**x == '/');
 952       if (has_wildcards_p (p))
 953         {
 954           if (matcher (p, dir, FNM_PATHNAME) == 0)
 955             break;
 956         }
 957       else
 958         {
 959           if (subdir_p (p, dir))
 960             break;
 961         }
 962     }
 963
 964   return *x ? true : false;
 965 }
 966
 967 /* Returns whether DIRECTORY is acceptable for download, wrt the
 968    include/exclude lists.
 969
 970    The leading `/' is ignored in paths; relative and absolute paths
 971    may be freely intermixed.  */
 972
 973 bool
 974 accdir (const char *directory)
 975 {
 976   /* Remove starting '/'.  */
 977   if (*directory == '/')
 978     ++directory;
 979   if (opt.includes)
 980     {
 981       if (!dir_matches_p (opt.includes, directory))
 982         return false;
 983     }
 984   if (opt.excludes)
 985     {
 986       if (dir_matches_p (opt.excludes, directory))
 987         return false;
 988     }
 989   return true;
 990 }
 991
 992 /* Return true if STRING ends with TAIL.  For instance:
 993
 994    match_tail ("abc", "bc", false)  -> 1
 995    match_tail ("abc", "ab", false)  -> 0
 996    match_tail ("abc", "abc", false) -> 1
 997
 998    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 999
1000 bool
1001 match_tail (const char *string, const char *tail, bool fold_case)
1002 {
1003   int i, j;
1004
1005   /* We want this to be fast, so we code two loops, one with
1006      case-folding, one without. */
1007
1008   if (!fold_case)
1009     {
1010       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1011         if (string[i] != tail[j])
1012           break;
1013     }
1014   else
1015     {
1016       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1017         if (c_tolower (string[i]) != c_tolower (tail[j]))
1018           break;
1019     }
1020
1021   /* If the tail was exhausted, the match was succesful.  */
1022   if (j == -1)
1023     return true;
1024   else
1025     return false;
1026 }
1027
1028 /* Checks whether string S matches each element of ACCEPTS.  A list
1029    element are matched either with fnmatch() or match_tail(),
1030    according to whether the element contains wildcards or not.
1031
1032    If the BACKWARD is false, don't do backward comparison -- just compare
1033    them normally.  */
1034 static bool
1035 in_acclist (const char *const *accepts, const char *s, bool backward)
1036 {
1037   for (; *accepts; accepts++)
1038     {
1039       if (has_wildcards_p (*accepts))
1040         {
1041           int res = opt.ignore_case
1042             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1043           /* fnmatch returns 0 if the pattern *does* match the string.  */
1044           if (res == 0)
1045             return true;
1046         }
1047       else
1048         {
1049           if (backward)
1050             {
1051               if (match_tail (s, *accepts, opt.ignore_case))
1052                 return true;
1053             }
1054           else
1055             {
1056               int cmp = opt.ignore_case
1057                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1058               if (cmp == 0)
1059                 return true;
1060             }
1061         }
1062     }
1063   return false;
1064 }
1065
1066 /* Return the location of STR's suffix (file extension).  Examples:
1067    suffix ("foo.bar")       -> "bar"
1068    suffix ("foo.bar.baz")   -> "baz"
1069    suffix ("/foo/bar")      -> NULL
1070    suffix ("/foo.bar/baz")  -> NULL  */
1071 char *
1072 suffix (const char *str)
1073 {
1074   int i;
1075
1076   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
1077     ;
1078
1079   if (str[i++] == '.')
1080     return (char *)str + i;
1081   else
1082     return NULL;
1083 }
1084
1085 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1086    `]').  */
1087
1088 bool
1089 has_wildcards_p (const char *s)
1090 {
1091   for (; *s; s++)
1092     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
1093       return true;
1094   return false;
1095 }
1096
1097 /* Return true if FNAME ends with a typical HTML suffix.  The
1098    following (case-insensitive) suffixes are presumed to be HTML
1099    files:
1100
1101      html
1102      htm
1103      ?html (`?' matches one character)
1104
1105    #### CAVEAT.  This is not necessarily a good indication that FNAME
1106    refers to a file that contains HTML!  */
1107 bool
1108 has_html_suffix_p (const char *fname)
1109 {
1110   char *suf;
1111
1112   if ((suf = suffix (fname)) == NULL)
1113     return false;
1114   if (!strcasecmp (suf, "html"))
1115     return true;
1116   if (!strcasecmp (suf, "htm"))
1117     return true;
1118   if (suf[0] && !strcasecmp (suf + 1, "html"))
1119     return true;
1120   return false;
1121 }
1122
1123 /* Read a line from FP and return the pointer to freshly allocated
1124    storage.  The storage space is obtained through malloc() and should
1125    be freed with free() when it is no longer needed.
1126
1127    The length of the line is not limited, except by available memory.
1128    The newline character at the end of line is retained.  The line is
1129    terminated with a zero character.
1130
1131    After end-of-file is encountered without anything being read, NULL
1132    is returned.  NULL is also returned on error.  To distinguish
1133    between these two cases, use the stdio function ferror().  */
1134
1135 char *
1136 read_whole_line (FILE *fp)
1137 {
1138   int length = 0;
1139   int bufsize = 82;
1140   char *line = xmalloc (bufsize);
1141
1142   while (fgets (line + length, bufsize - length, fp))
1143     {
1144       length += strlen (line + length);
1145       if (length == 0)
1146         /* Possible for example when reading from a binary file where
1147            a line begins with \0.  */
1148         continue;
1149
1150       if (line[length - 1] == '\n')
1151         break;
1152
1153       /* fgets() guarantees to read the whole line, or to use up the
1154          space we've given it.  We can double the buffer
1155          unconditionally.  */
1156       bufsize <<= 1;
1157       line = xrealloc (line, bufsize);
1158     }
1159   if (length == 0 || ferror (fp))
1160     {
1161       xfree (line);
1162       return NULL;
1163     }
1164   if (length + 1 < bufsize)
1165     /* Relieve the memory from our exponential greediness.  We say
1166        `length + 1' because the terminating \0 is not included in
1167        LENGTH.  We don't need to zero-terminate the string ourselves,
1168        though, because fgets() does that.  */
1169     line = xrealloc (line, length + 1);
1170   return line;
1171 }
1172 \f
1173 /* Read FILE into memory.  A pointer to `struct file_memory' are
1174    returned; use struct element `content' to access file contents, and
1175    the element `length' to know the file length.  `content' is *not*
1176    zero-terminated, and you should *not* read or write beyond the [0,
1177    length) range of characters.
1178
1179    After you are done with the file contents, call wget_read_file_free to
1180    release the memory.
1181
1182    Depending on the operating system and the type of file that is
1183    being read, wget_read_file() either mmap's the file into memory, or
1184    reads the file into the core using read().
1185
1186    If file is named "-", fileno(stdin) is used for reading instead.
1187    If you want to read from a real file named "-", use "./-" instead.  */
1188
1189 struct file_memory *
1190 wget_read_file (const char *file)
1191 {
1192   int fd;
1193   struct file_memory *fm;
1194   long size;
1195   bool inhibit_close = false;
1196
1197   /* Some magic in the finest tradition of Perl and its kin: if FILE
1198      is "-", just use stdin.  */
1199   if (HYPHENP (file))
1200     {
1201       fd = fileno (stdin);
1202       inhibit_close = true;
1203       /* Note that we don't inhibit mmap() in this case.  If stdin is
1204          redirected from a regular file, mmap() will still work.  */
1205     }
1206   else
1207     fd = open (file, O_RDONLY);
1208   if (fd < 0)
1209     return NULL;
1210   fm = xnew (struct file_memory);
1211
1212 #ifdef HAVE_MMAP
1213   {
1214     struct_fstat buf;
1215     if (fstat (fd, &buf) < 0)
1216       goto mmap_lose;
1217     fm->length = buf.st_size;
1218     /* NOTE: As far as I know, the callers of this function never
1219        modify the file text.  Relying on this would enable us to
1220        specify PROT_READ and MAP_SHARED for a marginal gain in
1221        efficiency, but at some cost to generality.  */
1222     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1223                         MAP_PRIVATE, fd, 0);
1224     if (fm->content == (char *)MAP_FAILED)
1225       goto mmap_lose;
1226     if (!inhibit_close)
1227       close (fd);
1228
1229     fm->mmap_p = 1;
1230     return fm;
1231   }
1232
1233  mmap_lose:
1234   /* The most common reason why mmap() fails is that FD does not point
1235      to a plain file.  However, it's also possible that mmap() doesn't
1236      work for a particular type of file.  Therefore, whenever mmap()
1237      fails, we just fall back to the regular method.  */
1238 #endif /* HAVE_MMAP */
1239
1240   fm->length = 0;
1241   size = 512;                   /* number of bytes fm->contents can
1242                                    hold at any given time. */
1243   fm->content = xmalloc (size);
1244   while (1)
1245     {
1246       wgint nread;
1247       if (fm->length > size / 2)
1248         {
1249           /* #### I'm not sure whether the whole exponential-growth
1250              thing makes sense with kernel read.  On Linux at least,
1251              read() refuses to read more than 4K from a file at a
1252              single chunk anyway.  But other Unixes might optimize it
1253              better, and it doesn't *hurt* anything, so I'm leaving
1254              it.  */
1255
1256           /* Normally, we grow SIZE exponentially to make the number
1257              of calls to read() and realloc() logarithmic in relation
1258              to file size.  However, read() can read an amount of data
1259              smaller than requested, and it would be unreasonable to
1260              double SIZE every time *something* was read.  Therefore,
1261              we double SIZE only when the length exceeds half of the
1262              entire allocated size.  */
1263           size <<= 1;
1264           fm->content = xrealloc (fm->content, size);
1265         }
1266       nread = read (fd, fm->content + fm->length, size - fm->length);
1267       if (nread > 0)
1268         /* Successful read. */
1269         fm->length += nread;
1270       else if (nread < 0)
1271         /* Error. */
1272         goto lose;
1273       else
1274         /* EOF */
1275         break;
1276     }
1277   if (!inhibit_close)
1278     close (fd);
1279   if (size > fm->length && fm->length != 0)
1280     /* Due to exponential growth of fm->content, the allocated region
1281        might be much larger than what is actually needed.  */
1282     fm->content = xrealloc (fm->content, fm->length);
1283   fm->mmap_p = 0;
1284   return fm;
1285
1286  lose:
1287   if (!inhibit_close)
1288     close (fd);
1289   xfree (fm->content);
1290   xfree (fm);
1291   return NULL;
1292 }
1293
1294 /* Release the resources held by FM.  Specifically, this calls
1295    munmap() or xfree() on fm->content, depending whether mmap or
1296    malloc/read were used to read in the file.  It also frees the
1297    memory needed to hold the FM structure itself.  */
1298
1299 void
1300 wget_read_file_free (struct file_memory *fm)
1301 {
1302 #ifdef HAVE_MMAP
1303   if (fm->mmap_p)
1304     {
1305       munmap (fm->content, fm->length);
1306     }
1307   else
1308 #endif
1309     {
1310       xfree (fm->content);
1311     }
1312   xfree (fm);
1313 }
1314 \f
1315 /* Free the pointers in a NULL-terminated vector of pointers, then
1316    free the pointer itself.  */
1317 void
1318 free_vec (char **vec)
1319 {
1320   if (vec)
1321     {
1322       char **p = vec;
1323       while (*p)
1324         xfree (*p++);
1325       xfree (vec);
1326     }
1327 }
1328
1329 /* Append vector V2 to vector V1.  The function frees V2 and
1330    reallocates V1 (thus you may not use the contents of neither
1331    pointer after the call).  If V1 is NULL, V2 is returned.  */
1332 char **
1333 merge_vecs (char **v1, char **v2)
1334 {
1335   int i, j;
1336
1337   if (!v1)
1338     return v2;
1339   if (!v2)
1340     return v1;
1341   if (!*v2)
1342     {
1343       /* To avoid j == 0 */
1344       xfree (v2);
1345       return v1;
1346     }
1347   /* Count v1.  */
1348   for (i = 0; v1[i]; i++)
1349     ;
1350   /* Count v2.  */
1351   for (j = 0; v2[j]; j++)
1352     ;
1353   /* Reallocate v1.  */
1354   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1355   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1356   xfree (v2);
1357   return v1;
1358 }
1359
1360 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1361    is allocated as needed.  Return the new value of the vector. */
1362
1363 char **
1364 vec_append (char **vec, const char *str)
1365 {
1366   int cnt;                      /* count of vector elements, including
1367                                    the one we're about to append */
1368   if (vec != NULL)
1369     {
1370       for (cnt = 0; vec[cnt]; cnt++)
1371         ;
1372       ++cnt;
1373     }
1374   else
1375     cnt = 1;
1376   /* Reallocate the array to fit the new element and the NULL. */
1377   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1378   /* Append a copy of STR to the vector. */
1379   vec[cnt - 1] = xstrdup (str);
1380   vec[cnt] = NULL;
1381   return vec;
1382 }
1383 \f
1384 /* Sometimes it's useful to create "sets" of strings, i.e. special
1385    hash tables where you want to store strings as keys and merely
1386    query for their existence.  Here is a set of utility routines that
1387    makes that transparent.  */
1388
1389 void
1390 string_set_add (struct hash_table *ht, const char *s)
1391 {
1392   /* First check whether the set element already exists.  If it does,
1393      do nothing so that we don't have to free() the old element and
1394      then strdup() a new one.  */
1395   if (hash_table_contains (ht, s))
1396     return;
1397
1398   /* We use "1" as value.  It provides us a useful and clear arbitrary
1399      value, and it consumes no memory -- the pointers to the same
1400      string "1" will be shared by all the key-value pairs in all `set'
1401      hash tables.  */
1402   hash_table_put (ht, xstrdup (s), "1");
1403 }
1404
1405 /* Synonym for hash_table_contains... */
1406
1407 int
1408 string_set_contains (struct hash_table *ht, const char *s)
1409 {
1410   return hash_table_contains (ht, s);
1411 }
1412
1413 /* Convert the specified string set to array.  ARRAY should be large
1414    enough to hold hash_table_count(ht) char pointers.  */
1415
1416 void string_set_to_array (struct hash_table *ht, char **array)
1417 {
1418   hash_table_iterator iter;
1419   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1420     *array++ = iter.key;
1421 }
1422
1423 /* Free the string set.  This frees both the storage allocated for
1424    keys and the actual hash table.  (hash_table_destroy would only
1425    destroy the hash table.)  */
1426
1427 void
1428 string_set_free (struct hash_table *ht)
1429 {
1430   hash_table_iterator iter;
1431   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1432     xfree (iter.key);
1433   hash_table_destroy (ht);
1434 }
1435
1436 /* Utility function: simply call xfree() on all keys and values of HT.  */
1437
1438 void
1439 free_keys_and_values (struct hash_table *ht)
1440 {
1441   hash_table_iterator iter;
1442   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1443     {
1444       xfree (iter.key);
1445       xfree (iter.value);
1446     }
1447 }
1448 \f
1449 /* Get digit grouping data for thousand separors by calling
1450    localeconv().  The data includes separator string and grouping info
1451    and is cached after the first call to the function.
1452
1453    In locales that don't set a thousand separator (such as the "C"
1454    locale), this forces it to be ",".  We are now only showing
1455    thousand separators in one place, so this shouldn't be a problem in
1456    practice.  */
1457
1458 static void
1459 get_grouping_data (const char **sep, const char **grouping)
1460 {
1461   static const char *cached_sep;
1462   static const char *cached_grouping;
1463   static bool initialized;
1464   if (!initialized)
1465     {
1466       /* Get the grouping info from the locale. */
1467       struct lconv *lconv = localeconv ();
1468       cached_sep = lconv->thousands_sep;
1469       cached_grouping = lconv->grouping;
1470 #if ! USE_NLS_PROGRESS_BAR
1471       /* We can't count column widths, so ensure that the separator
1472        * is single-byte only (let check below determine what byte). */
1473       if (strlen(cached_sep) > 1)
1474         cached_sep = "";
1475 #endif
1476       if (!*cached_sep)
1477         {
1478           /* Many locales (such as "C" or "hr_HR") don't specify
1479              grouping, which we still want to use it for legibility.
1480              In those locales set the sep char to ',', unless that
1481              character is used for decimal point, in which case set it
1482              to ".".  */
1483           if (*lconv->decimal_point != ',')
1484             cached_sep = ",";
1485           else
1486             cached_sep = ".";
1487           cached_grouping = "\x03";
1488         }
1489       initialized = true;
1490     }
1491   *sep = cached_sep;
1492   *grouping = cached_grouping;
1493 }
1494
1495 /* Return a printed representation of N with thousand separators.
1496    This should respect locale settings, with the exception of the "C"
1497    locale which mandates no separator, but we use one anyway.
1498
1499    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1500    the separators because it's too non-portable, and it's hard to test
1501    for this feature at configure time.  Besides, it wouldn't display
1502    separators in the "C" locale, still used by many Unix users.  */
1503
1504 const char *
1505 with_thousand_seps (wgint n)
1506 {
1507   static char outbuf[48];
1508   char *p = outbuf + sizeof outbuf;
1509
1510   /* Info received from locale */
1511   const char *grouping, *sep;
1512   int seplen;
1513
1514   /* State information */
1515   int i = 0, groupsize;
1516   const char *atgroup;
1517
1518   bool negative = n < 0;
1519
1520   /* Initialize grouping data. */
1521   get_grouping_data (&sep, &grouping);
1522   seplen = strlen (sep);
1523   atgroup = grouping;
1524   groupsize = *atgroup++;
1525
1526   /* This would overflow on WGINT_MIN, but printing negative numbers
1527      is not an important goal of this fuinction.  */
1528   if (negative)
1529     n = -n;
1530
1531   /* Write the number into the buffer, backwards, inserting the
1532      separators as necessary.  */
1533   *--p = '\0';
1534   while (1)
1535     {
1536       *--p = n % 10 + '0';
1537       n /= 10;
1538       if (n == 0)
1539         break;
1540       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1541       if (++i == groupsize)
1542         {
1543           if (seplen == 1)
1544             *--p = *sep;
1545           else
1546             memcpy (p -= seplen, sep, seplen);
1547           i = 0;
1548           if (*atgroup)
1549             groupsize = *atgroup++;
1550         }
1551     }
1552   if (negative)
1553     *--p = '-';
1554
1555   return p;
1556 }
1557
1558 /* N, a byte quantity, is converted to a human-readable abberviated
1559    form a la sizes printed by `ls -lh'.  The result is written to a
1560    static buffer, a pointer to which is returned.
1561
1562    Unlike `with_thousand_seps', this approximates to the nearest unit.
1563    Quoting GNU libit: "Most people visually process strings of 3-4
1564    digits effectively, but longer strings of digits are more prone to
1565    misinterpretation.  Hence, converting to an abbreviated form
1566    usually improves readability."
1567
1568    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1569    original computer-related meaning of "powers of 1024".  We don't
1570    use the "*bibyte" names invented in 1998, and seldom used in
1571    practice.  Wikipedia's entry on "binary prefix" discusses this in
1572    some detail.  */
1573
1574 char *
1575 human_readable (HR_NUMTYPE n)
1576 {
1577   /* These suffixes are compatible with those of GNU `ls -lh'. */
1578   static char powers[] =
1579     {
1580       'K',                      /* kilobyte, 2^10 bytes */
1581       'M',                      /* megabyte, 2^20 bytes */
1582       'G',                      /* gigabyte, 2^30 bytes */
1583       'T',                      /* terabyte, 2^40 bytes */
1584       'P',                      /* petabyte, 2^50 bytes */
1585       'E',                      /* exabyte,  2^60 bytes */
1586     };
1587   static char buf[8];
1588   size_t i;
1589
1590   /* If the quantity is smaller than 1K, just print it. */
1591   if (n < 1024)
1592     {
1593       snprintf (buf, sizeof (buf), "%d", (int) n);
1594       return buf;
1595     }
1596
1597   /* Loop over powers, dividing N with 1024 in each iteration.  This
1598      works unchanged for all sizes of wgint, while still avoiding
1599      non-portable `long double' arithmetic.  */
1600   for (i = 0; i < countof (powers); i++)
1601     {
1602       /* At each iteration N is greater than the *subsequent* power.
1603          That way N/1024.0 produces a decimal number in the units of
1604          *this* power.  */
1605       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1606         {
1607           double val = n / 1024.0;
1608           /* Print values smaller than 10 with one decimal digits, and
1609              others without any decimals.  */
1610           snprintf (buf, sizeof (buf), "%.*f%c",
1611                     val < 10 ? 1 : 0, val, powers[i]);
1612           return buf;
1613         }
1614       n /= 1024;
1615     }
1616   return NULL;                  /* unreached */
1617 }
1618
1619 /* Count the digits in the provided number.  Used to allocate space
1620    when printing numbers.  */
1621
1622 int
1623 numdigit (wgint number)
1624 {
1625   int cnt = 1;
1626   if (number < 0)
1627     ++cnt;                      /* accomodate '-' */
1628   while ((number /= 10) != 0)
1629     ++cnt;
1630   return cnt;
1631 }
1632
1633 #define PR(mask) *p++ = n / (mask) + '0'
1634
1635 /* DIGITS_<D> is used to print a D-digit number and should be called
1636    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1637    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1638    Recursively this continues until DIGITS_1 is invoked.  */
1639
1640 #define DIGITS_1(mask) PR (mask)
1641 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1642 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1643 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1644 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1645 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1646 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1647 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1648 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1649 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1650
1651 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1652
1653 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1654 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1655 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1656 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1657 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1658 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1659 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1660 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1661 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1662
1663 /* Shorthand for casting to wgint. */
1664 #define W wgint
1665
1666 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1667    `sprintf(buffer, "%lld", (long long) number)', only typically much
1668    faster and portable to machines without long long.
1669
1670    The speedup may make a difference in programs that frequently
1671    convert numbers to strings.  Some implementations of sprintf,
1672    particularly the one in some versions of GNU libc, have been known
1673    to be quite slow when converting integers to strings.
1674
1675    Return the pointer to the location where the terminating zero was
1676    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1677    function is done.)
1678
1679    BUFFER should be large enough to accept as many bytes as you expect
1680    the number to take up.  On machines with 64-bit wgints the maximum
1681    needed size is 24 bytes.  That includes the digits needed for the
1682    largest 64-bit number, the `-' sign in case it's negative, and the
1683    terminating '\0'.  */
1684
1685 char *
1686 number_to_string (char *buffer, wgint number)
1687 {
1688   char *p = buffer;
1689   wgint n = number;
1690
1691   int last_digit_char = 0;
1692
1693 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1694   /* We are running in a very strange environment.  Leave the correct
1695      printing to sprintf.  */
1696   p += sprintf (buf, "%j", (intmax_t) (n));
1697 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1698
1699   if (n < 0)
1700     {
1701       if (n < -WGINT_MAX)
1702         {
1703           /* n = -n would overflow because -n would evaluate to a
1704              wgint value larger than WGINT_MAX.  Need to make n
1705              smaller and handle the last digit separately.  */
1706           int last_digit = n % 10;
1707           /* The sign of n%10 is implementation-defined. */
1708           if (last_digit < 0)
1709             last_digit_char = '0' - last_digit;
1710           else
1711             last_digit_char = '0' + last_digit;
1712           /* After n is made smaller, -n will not overflow. */
1713           n /= 10;
1714         }
1715
1716       *p++ = '-';
1717       n = -n;
1718     }
1719
1720   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1721      way printing any N is fully open-coded without a loop or jump.
1722      (Also see description of DIGITS_*.)  */
1723
1724   if      (n < 10)                       DIGITS_1 (1);
1725   else if (n < 100)                      DIGITS_2 (10);
1726   else if (n < 1000)                     DIGITS_3 (100);
1727   else if (n < 10000)                    DIGITS_4 (1000);
1728   else if (n < 100000)                   DIGITS_5 (10000);
1729   else if (n < 1000000)                  DIGITS_6 (100000);
1730   else if (n < 10000000)                 DIGITS_7 (1000000);
1731   else if (n < 100000000)                DIGITS_8 (10000000);
1732   else if (n < 1000000000)               DIGITS_9 (100000000);
1733 #if SIZEOF_WGINT == 4
1734   /* wgint is 32 bits wide: no number has more than 10 digits. */
1735   else                                   DIGITS_10 (1000000000);
1736 #else
1737   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1738      Constants are constructed by compile-time multiplication to avoid
1739      dealing with different notations for 64-bit constants
1740      (nL/nLL/nI64, depending on the compiler and architecture).  */
1741   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1742   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1743   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1744   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1745   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1746   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1747   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1748   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1749   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1750   else                                   DIGITS_19 (1000000000*(W)1000000000);
1751 #endif
1752
1753   if (last_digit_char)
1754     *p++ = last_digit_char;
1755
1756   *p = '\0';
1757 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1758
1759   return p;
1760 }
1761
1762 #undef PR
1763 #undef W
1764 #undef SPRINTF_WGINT
1765 #undef DIGITS_1
1766 #undef DIGITS_2
1767 #undef DIGITS_3
1768 #undef DIGITS_4
1769 #undef DIGITS_5
1770 #undef DIGITS_6
1771 #undef DIGITS_7
1772 #undef DIGITS_8
1773 #undef DIGITS_9
1774 #undef DIGITS_10
1775 #undef DIGITS_11
1776 #undef DIGITS_12
1777 #undef DIGITS_13
1778 #undef DIGITS_14
1779 #undef DIGITS_15
1780 #undef DIGITS_16
1781 #undef DIGITS_17
1782 #undef DIGITS_18
1783 #undef DIGITS_19
1784
1785 #define RING_SIZE 3
1786
1787 /* Print NUMBER to a statically allocated string and return a pointer
1788    to the printed representation.
1789
1790    This function is intended to be used in conjunction with printf.
1791    It is hard to portably print wgint values:
1792     a) you cannot use printf("%ld", number) because wgint can be long
1793        long on 32-bit machines with LFS.
1794     b) you cannot use printf("%lld", number) because NUMBER could be
1795        long on 32-bit machines without LFS, or on 64-bit machines,
1796        which do not require LFS.  Also, Windows doesn't support %lld.
1797     c) you cannot use printf("%j", (int_max_t) number) because not all
1798        versions of printf support "%j", the most notable being the one
1799        on Windows.
1800     d) you cannot #define WGINT_FMT to the appropriate format and use
1801        printf(WGINT_FMT, number) because that would break translations
1802        for user-visible messages, such as printf("Downloaded: %d
1803        bytes\n", number).
1804
1805    What you should use instead is printf("%s", number_to_static_string
1806    (number)).
1807
1808    CAVEAT: since the function returns pointers to static data, you
1809    must be careful to copy its result before calling it again.
1810    However, to make it more useful with printf, the function maintains
1811    an internal ring of static buffers to return.  That way things like
1812    printf("%s %s", number_to_static_string (num1),
1813    number_to_static_string (num2)) work as expected.  Three buffers
1814    are currently used, which means that "%s %s %s" will work, but "%s
1815    %s %s %s" won't.  If you need to print more than three wgints,
1816    bump the RING_SIZE (or rethink your message.)  */
1817
1818 char *
1819 number_to_static_string (wgint number)
1820 {
1821   static char ring[RING_SIZE][24];
1822   static int ringpos;
1823   char *buf = ring[ringpos];
1824   number_to_string (buf, number);
1825   ringpos = (ringpos + 1) % RING_SIZE;
1826   return buf;
1827 }
1828 \f
1829 /* Determine the width of the terminal we're running on.  If that's
1830    not possible, return 0.  */
1831
1832 int
1833 determine_screen_width (void)
1834 {
1835   /* If there's a way to get the terminal size using POSIX
1836      tcgetattr(), somebody please tell me.  */
1837 #ifdef TIOCGWINSZ
1838   int fd;
1839   struct winsize wsz;
1840
1841   if (opt.lfilename != NULL)
1842     return 0;
1843
1844   fd = fileno (stderr);
1845   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1846     return 0;                   /* most likely ENOTTY */
1847
1848   return wsz.ws_col;
1849 #elif defined(WINDOWS)
1850   CONSOLE_SCREEN_BUFFER_INFO csbi;
1851   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1852     return 0;
1853   return csbi.dwSize.X;
1854 #else  /* neither TIOCGWINSZ nor WINDOWS */
1855   return 0;
1856 #endif /* neither TIOCGWINSZ nor WINDOWS */
1857 }
1858 \f
1859 /* Whether the rnd system (either rand or [dl]rand48) has been
1860    seeded.  */
1861 static int rnd_seeded;
1862
1863 /* Return a random number between 0 and MAX-1, inclusive.
1864
1865    If the system does not support lrand48 and MAX is greater than the
1866    value of RAND_MAX+1 on the system, the returned value will be in
1867    the range [0, RAND_MAX].  This may be fixed in a future release.
1868    The random number generator is seeded automatically the first time
1869    it is called.
1870
1871    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1872    for cryptography.  It is only meant to be used in situations where
1873    quality of the random numbers returned doesn't really matter.  */
1874
1875 int
1876 random_number (int max)
1877 {
1878 #ifdef HAVE_DRAND48
1879   if (!rnd_seeded)
1880     {
1881       srand48 ((long) time (NULL) ^ (long) getpid ());
1882       rnd_seeded = 1;
1883     }
1884   return lrand48 () % max;
1885 #else  /* not HAVE_DRAND48 */
1886
1887   double bounded;
1888   int rnd;
1889   if (!rnd_seeded)
1890     {
1891       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1892       rnd_seeded = 1;
1893     }
1894   rnd = rand ();
1895
1896   /* Like rand() % max, but uses the high-order bits for better
1897      randomness on architectures where rand() is implemented using a
1898      simple congruential generator.  */
1899
1900   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1901   return (int) bounded;
1902
1903 #endif /* not HAVE_DRAND48 */
1904 }
1905
1906 /* Return a random uniformly distributed floating point number in the
1907    [0, 1) range.  Uses drand48 where available, and a really lame
1908    kludge elsewhere.  */
1909
1910 double
1911 random_float (void)
1912 {
1913 #ifdef HAVE_DRAND48
1914   if (!rnd_seeded)
1915     {
1916       srand48 ((long) time (NULL) ^ (long) getpid ());
1917       rnd_seeded = 1;
1918     }
1919   return drand48 ();
1920 #else  /* not HAVE_DRAND48 */
1921   return (  random_number (10000) / 10000.0
1922           + random_number (10000) / (10000.0 * 10000.0)
1923           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1924           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1925 #endif /* not HAVE_DRAND48 */
1926 }
1927 \f
1928 /* Implementation of run_with_timeout, a generic timeout-forcing
1929    routine for systems with Unix-like signal handling.  */
1930
1931 #ifdef USE_SIGNAL_TIMEOUT
1932 # ifdef HAVE_SIGSETJMP
1933 #  define SETJMP(env) sigsetjmp (env, 1)
1934
1935 static sigjmp_buf run_with_timeout_env;
1936
1937 static void
1938 abort_run_with_timeout (int sig)
1939 {
1940   assert (sig == SIGALRM);
1941   siglongjmp (run_with_timeout_env, -1);
1942 }
1943 # else /* not HAVE_SIGSETJMP */
1944 #  define SETJMP(env) setjmp (env)
1945
1946 static jmp_buf run_with_timeout_env;
1947
1948 static void
1949 abort_run_with_timeout (int sig)
1950 {
1951   assert (sig == SIGALRM);
1952   /* We don't have siglongjmp to preserve the set of blocked signals;
1953      if we longjumped out of the handler at this point, SIGALRM would
1954      remain blocked.  We must unblock it manually. */
1955   sigset_t set;
1956   sigemptyset (&set);
1957   sigaddset (&set, SIGALRM);
1958   sigprocmask (SIG_BLOCK, &set, NULL);
1959
1960   /* Now it's safe to longjump. */
1961   longjmp (run_with_timeout_env, -1);
1962 }
1963 # endif /* not HAVE_SIGSETJMP */
1964
1965 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1966    setitimer where available, alarm otherwise.
1967
1968    TIMEOUT should be non-zero.  If the timeout value is so small that
1969    it would be rounded to zero, it is rounded to the least legal value
1970    instead (1us for setitimer, 1s for alarm).  That ensures that
1971    SIGALRM will be delivered in all cases.  */
1972
1973 static void
1974 alarm_set (double timeout)
1975 {
1976 #ifdef ITIMER_REAL
1977   /* Use the modern itimer interface. */
1978   struct itimerval itv;
1979   xzero (itv);
1980   itv.it_value.tv_sec = (long) timeout;
1981   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1982   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1983     /* Ensure that we wait for at least the minimum interval.
1984        Specifying zero would mean "wait forever".  */
1985     itv.it_value.tv_usec = 1;
1986   setitimer (ITIMER_REAL, &itv, NULL);
1987 #else  /* not ITIMER_REAL */
1988   /* Use the old alarm() interface. */
1989   int secs = (int) timeout;
1990   if (secs == 0)
1991     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1992        because alarm(0) means "never deliver the alarm", i.e. "wait
1993        forever", which is not what someone who specifies a 0.5s
1994        timeout would expect.  */
1995     secs = 1;
1996   alarm (secs);
1997 #endif /* not ITIMER_REAL */
1998 }
1999
2000 /* Cancel the alarm set with alarm_set. */
2001
2002 static void
2003 alarm_cancel (void)
2004 {
2005 #ifdef ITIMER_REAL
2006   struct itimerval disable;
2007   xzero (disable);
2008   setitimer (ITIMER_REAL, &disable, NULL);
2009 #else  /* not ITIMER_REAL */
2010   alarm (0);
2011 #endif /* not ITIMER_REAL */
2012 }
2013
2014 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2015    seconds.  Returns true if the function was interrupted with a
2016    timeout, false otherwise.
2017
2018    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2019    using setitimer() or alarm().  The timeout is enforced by
2020    longjumping out of the SIGALRM handler.  This has several
2021    advantages compared to the traditional approach of relying on
2022    signals causing system calls to exit with EINTR:
2023
2024      * The callback function is *forcibly* interrupted after the
2025        timeout expires, (almost) regardless of what it was doing and
2026        whether it was in a syscall.  For example, a calculation that
2027        takes a long time is interrupted as reliably as an IO
2028        operation.
2029
2030      * It works with both SYSV and BSD signals because it doesn't
2031        depend on the default setting of SA_RESTART.
2032
2033      * It doesn't require special handler setup beyond a simple call
2034        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2035        optional.)
2036
2037    The only downside is that, if FUN allocates internal resources that
2038    are normally freed prior to exit from the functions, they will be
2039    lost in case of timeout.  */
2040
2041 bool
2042 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2043 {
2044   int saved_errno;
2045
2046   if (timeout == 0)
2047     {
2048       fun (arg);
2049       return false;
2050     }
2051
2052   signal (SIGALRM, abort_run_with_timeout);
2053   if (SETJMP (run_with_timeout_env) != 0)
2054     {
2055       /* Longjumped out of FUN with a timeout. */
2056       signal (SIGALRM, SIG_DFL);
2057       return true;
2058     }
2059   alarm_set (timeout);
2060   fun (arg);
2061
2062   /* Preserve errno in case alarm() or signal() modifies it. */
2063   saved_errno = errno;
2064   alarm_cancel ();
2065   signal (SIGALRM, SIG_DFL);
2066   errno = saved_errno;
2067
2068   return false;
2069 }
2070
2071 #else  /* not USE_SIGNAL_TIMEOUT */
2072
2073 #ifndef WINDOWS
2074 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2075    define it under Windows, because Windows has its own version of
2076    run_with_timeout that uses threads.  */
2077
2078 bool
2079 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2080 {
2081   fun (arg);
2082   return false;
2083 }
2084 #endif /* not WINDOWS */
2085 #endif /* not USE_SIGNAL_TIMEOUT */
2086 \f
2087 #ifndef WINDOWS
2088
2089 /* Sleep the specified amount of seconds.  On machines without
2090    nanosleep(), this may sleep shorter if interrupted by signals.  */
2091
2092 void
2093 xsleep (double seconds)
2094 {
2095 #ifdef HAVE_NANOSLEEP
2096   /* nanosleep is the preferred interface because it offers high
2097      accuracy and, more importantly, because it allows us to reliably
2098      restart receiving a signal such as SIGWINCH.  (There was an
2099      actual Debian bug report about --limit-rate malfunctioning while
2100      the terminal was being resized.)  */
2101   struct timespec sleep, remaining;
2102   sleep.tv_sec = (long) seconds;
2103   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2104   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2105     /* If nanosleep has been interrupted by a signal, adjust the
2106        sleeping period and return to sleep.  */
2107     sleep = remaining;
2108 #elif defined(HAVE_USLEEP)
2109   /* If usleep is available, use it in preference to select.  */
2110   if (seconds >= 1)
2111     {
2112       /* On some systems, usleep cannot handle values larger than
2113          1,000,000.  If the period is larger than that, use sleep
2114          first, then add usleep for subsecond accuracy.  */
2115       sleep (seconds);
2116       seconds -= (long) seconds;
2117     }
2118   usleep (seconds * 1000000);
2119 #else /* fall back select */
2120   /* Note that, although Windows supports select, it can't be used to
2121      implement sleeping because Winsock's select doesn't implement
2122      timeout when it is passed NULL pointers for all fd sets.  (But it
2123      does under Cygwin, which implements Unix-compatible select.)  */
2124   struct timeval sleep;
2125   sleep.tv_sec = (long) seconds;
2126   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2127   select (0, NULL, NULL, NULL, &sleep);
2128   /* If select returns -1 and errno is EINTR, it means we were
2129      interrupted by a signal.  But without knowing how long we've
2130      actually slept, we can't return to sleep.  Using gettimeofday to
2131      track sleeps is slow and unreliable due to clock skew.  */
2132 #endif
2133 }
2134
2135 #endif /* not WINDOWS */
2136
2137 /* Encode the octets in DATA of length LENGTH to base64 format,
2138    storing the result to DEST.  The output will be zero-terminated,
2139    and must point to a writable buffer of at least
2140    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2141    the resulting base64 data, not counting the terminating zero.
2142
2143    This implementation does not emit newlines after 76 characters of
2144    base64 data.  */
2145
2146 int
2147 base64_encode (const void *data, int length, char *dest)
2148 {
2149   /* Conversion table.  */
2150   static const char tbl[64] = {
2151     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2152     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2153     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2154     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2155   };
2156   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2157      don't work for data with MSB set. */
2158   const unsigned char *s = data;
2159   /* Theoretical ANSI violation when length < 3. */
2160   const unsigned char *end = (const unsigned char *) data + length - 2;
2161   char *p = dest;
2162
2163   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2164   for (; s < end; s += 3)
2165     {
2166       *p++ = tbl[s[0] >> 2];
2167       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2168       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2169       *p++ = tbl[s[2] & 0x3f];
2170     }
2171
2172   /* Pad the result if necessary...  */
2173   switch (length % 3)
2174     {
2175     case 1:
2176       *p++ = tbl[s[0] >> 2];
2177       *p++ = tbl[(s[0] & 3) << 4];
2178       *p++ = '=';
2179       *p++ = '=';
2180       break;
2181     case 2:
2182       *p++ = tbl[s[0] >> 2];
2183       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2184       *p++ = tbl[((s[1] & 0xf) << 2)];
2185       *p++ = '=';
2186       break;
2187     }
2188   /* ...and zero-terminate it.  */
2189   *p = '\0';
2190
2191   return p - dest;
2192 }
2193
2194 /* Store in C the next non-whitespace character from the string, or \0
2195    when end of string is reached.  */
2196 #define NEXT_CHAR(c, p) do {                    \
2197   c = (unsigned char) *p++;                     \
2198 } while (c_isspace (c))
2199
2200 #define IS_ASCII(c) (((c) & 0x80) == 0)
2201
2202 /* Decode data from BASE64 (a null-terminated string) into memory
2203    pointed to by DEST.  DEST is assumed to be large enough to
2204    accomodate the decoded data, which is guaranteed to be no more than
2205    3/4*strlen(base64).
2206
2207    Since DEST is assumed to contain binary data, it is not
2208    NUL-terminated.  The function returns the length of the data
2209    written to TO.  -1 is returned in case of error caused by malformed
2210    base64 input.
2211
2212    This function originates from Free Recode.  */
2213
2214 int
2215 base64_decode (const char *base64, void *dest)
2216 {
2217   /* Table of base64 values for first 128 characters.  Note that this
2218      assumes ASCII (but so does Wget in other places).  */
2219   static const signed char base64_char_to_value[128] =
2220     {
2221       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2222       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2223       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2224       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2225       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2226       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2227       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2228       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2229       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2230       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2231       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2232       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2233       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2234     };
2235 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2236 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2237
2238   const char *p = base64;
2239   char *q = dest;
2240
2241   while (1)
2242     {
2243       unsigned char c;
2244       unsigned long value;
2245
2246       /* Process first byte of a quadruplet.  */
2247       NEXT_CHAR (c, p);
2248       if (!c)
2249         break;
2250       if (c == '=' || !IS_BASE64 (c))
2251         return -1;              /* illegal char while decoding base64 */
2252       value = BASE64_CHAR_TO_VALUE (c) << 18;
2253
2254       /* Process second byte of a quadruplet.  */
2255       NEXT_CHAR (c, p);
2256       if (!c)
2257         return -1;              /* premature EOF while decoding base64 */
2258       if (c == '=' || !IS_BASE64 (c))
2259         return -1;              /* illegal char while decoding base64 */
2260       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2261       *q++ = value >> 16;
2262
2263       /* Process third byte of a quadruplet.  */
2264       NEXT_CHAR (c, p);
2265       if (!c)
2266         return -1;              /* premature EOF while decoding base64 */
2267       if (!IS_BASE64 (c))
2268         return -1;              /* illegal char while decoding base64 */
2269
2270       if (c == '=')
2271         {
2272           NEXT_CHAR (c, p);
2273           if (!c)
2274             return -1;          /* premature EOF while decoding base64 */
2275           if (c != '=')
2276             return -1;          /* padding `=' expected but not found */
2277           continue;
2278         }
2279
2280       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2281       *q++ = 0xff & value >> 8;
2282
2283       /* Process fourth byte of a quadruplet.  */
2284       NEXT_CHAR (c, p);
2285       if (!c)
2286         return -1;              /* premature EOF while decoding base64 */
2287       if (c == '=')
2288         continue;
2289       if (!IS_BASE64 (c))
2290         return -1;              /* illegal char while decoding base64 */
2291
2292       value |= BASE64_CHAR_TO_VALUE (c);
2293       *q++ = 0xff & value;
2294     }
2295 #undef IS_BASE64
2296 #undef BASE64_CHAR_TO_VALUE
2297
2298   return q - (char *) dest;
2299 }
2300
2301 #undef IS_ASCII
2302 #undef NEXT_CHAR
2303 \f
2304 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2305    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2306
2307 static void
2308 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2309                     int (*cmpfun) (const void *, const void *))
2310 {
2311 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2312   if (from < to)
2313     {
2314       size_t i, j, k;
2315       size_t mid = (to + from) / 2;
2316       mergesort_internal (base, temp, size, from, mid, cmpfun);
2317       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2318       i = from;
2319       j = mid + 1;
2320       for (k = from; (i <= mid) && (j <= to); k++)
2321         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2322           memcpy (ELT (temp, k), ELT (base, i++), size);
2323         else
2324           memcpy (ELT (temp, k), ELT (base, j++), size);
2325       while (i <= mid)
2326         memcpy (ELT (temp, k++), ELT (base, i++), size);
2327       while (j <= to)
2328         memcpy (ELT (temp, k++), ELT (base, j++), size);
2329       for (k = from; k <= to; k++)
2330         memcpy (ELT (base, k), ELT (temp, k), size);
2331     }
2332 #undef ELT
2333 }
2334
2335 /* Stable sort with interface exactly like standard library's qsort.
2336    Uses mergesort internally, allocating temporary storage with
2337    alloca.  */
2338
2339 void
2340 stable_sort (void *base, size_t nmemb, size_t size,
2341              int (*cmpfun) (const void *, const void *))
2342 {
2343   if (size > 1)
2344     {
2345       void *temp = alloca (nmemb * size * sizeof (void *));
2346       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2347     }
2348 }
2349 \f
2350 /* Print a decimal number.  If it is equal to or larger than ten, the
2351    number is rounded.  Otherwise it is printed with one significant
2352    digit without trailing zeros and with no more than three fractional
2353    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2354    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2355
2356    This is useful for displaying durations because it provides
2357    order-of-magnitude information without unnecessary clutter --
2358    long-running downloads are shown without the fractional part, and
2359    short ones still retain one significant digit.  */
2360
2361 const char *
2362 print_decimal (double number)
2363 {
2364   static char buf[32];
2365   double n = number >= 0 ? number : -number;
2366
2367   if (n >= 9.95)
2368     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2369        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2370     snprintf (buf, sizeof buf, "%.0f", number);
2371   else if (n >= 0.95)
2372     snprintf (buf, sizeof buf, "%.1f", number);
2373   else if (n >= 0.001)
2374     snprintf (buf, sizeof buf, "%.1g", number);
2375   else if (n >= 0.0005)
2376     /* round [0.0005, 0.001) to 0.001 */
2377     snprintf (buf, sizeof buf, "%.3f", number);
2378   else
2379     /* print numbers close to 0 as 0, not 0.000 */
2380     strcpy (buf, "0");
2381
2382   return buf;
2383 }
2384
2385 #ifdef TESTING
2386
2387 const char *
2388 test_subdir_p()
2389 {
2390   int i;
2391   struct {
2392     char *d1;
2393     char *d2;
2394     bool result;
2395   } test_array[] = {
2396     { "/somedir", "/somedir", true },
2397     { "/somedir", "/somedir/d2", true },
2398     { "/somedir/d1", "/somedir", false },
2399   };
2400
2401   for (i = 0; i < countof(test_array); ++i)
2402     {
2403       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2404
2405       mu_assert ("test_subdir_p: wrong result",
2406                  res == test_array[i].result);
2407     }
2408
2409   return NULL;
2410 }
2411
2412 const char *
2413 test_dir_matches_p()
2414 {
2415   int i;
2416   struct {
2417     char *dirlist[3];
2418     char *dir;
2419     bool result;
2420   } test_array[] = {
2421     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2422     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2423     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2424     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2425     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2426     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2427     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2428     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2429     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2430     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2431     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2432     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2433     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2434     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2435     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2436   };
2437
2438   for (i = 0; i < countof(test_array); ++i)
2439     {
2440       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2441
2442       mu_assert ("test_dir_matches_p: wrong result",
2443                  res == test_array[i].result);
2444     }
2445
2446   return NULL;
2447 }
2448
2449 #endif /* TESTING */
2450