sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #include <errno.h>
  46 #include <fcntl.h>
  47 #include <assert.h>
  48 #include <stdarg.h>
  49 #include <locale.h>
  50
  51 #if HAVE_UTIME
  52 # include <sys/types.h>
  53 # ifdef HAVE_UTIME_H
  54 #  include <utime.h>
  55 # endif
  56
  57 # ifdef HAVE_SYS_UTIME_H
  58 #  include <sys/utime.h>
  59 # endif
  60 #endif
  61
  62 #include <sys/stat.h>
  63
  64 /* For TIOCGWINSZ and friends: */
  65 #include <sys/ioctl.h>
  66 #ifdef HAVE_TERMIOS_H
  67 # include <termios.h>
  68 #endif
  69
  70 /* Needed for Unix version of run_with_timeout. */
  71 #include <signal.h>
  72 #include <setjmp.h>
  73
  74 #ifndef HAVE_SIGSETJMP
  75 /* If sigsetjmp is a macro, configure won't pick it up. */
  76 # ifdef sigsetjmp
  77 #  define HAVE_SIGSETJMP
  78 # endif
  79 #endif
  80
  81 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  82 # define USE_SIGNAL_TIMEOUT
  83 #endif
  84
  85 #include "utils.h"
  86 #include "hash.h"
  87
  88 #ifdef __VMS
  89 #include "vms.h"
  90 #endif /* def __VMS */
  91
  92 #ifdef TESTING
  93 #include "test.h"
  94 #endif
  95
  96 static void
  97 memfatal (const char *context, long attempted_size)
  98 {
  99   /* Make sure we don't try to store part of the log line, and thus
 100      call malloc.  */
 101   log_set_save_context (false);
 102
 103   /* We have different log outputs in different situations:
 104      1) output without bytes information
 105      2) output with bytes information  */
 106   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 107     {
 108       logprintf (LOG_ALWAYS,
 109                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 110                  exec_name, context);
 111     }
 112   else
 113     {
 114       logprintf (LOG_ALWAYS,
 115                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 116                  exec_name, context, attempted_size);
 117     }
 118
 119   exit (1);
 120 }
 121
 122 /* Character property table for (re-)escaping VMS ODS5 extended file
 123    names.  Note that this table ignores Unicode.
 124
 125    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 126
 127    ODS5 Invalid characters:
 128       C0 control codes (0x00 to 0x1F inclusive)
 129       Asterisk (*)
 130       Question mark (?)
 131
 132    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 133       Double quotation marks (")
 134       Backslash (\)
 135       Colon (:)
 136       Left angle bracket (<)
 137       Right angle bracket (>)
 138       Slash (/)
 139       Vertical bar (|)
 140
 141    Characters escaped by "^":
 142       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 143        @  [  \  ]  ^  `  {  |  }  ~
 144
 145    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 146    case.  Note that un-escaped < and > can also confuse a directory
 147    spec.
 148
 149    Characters put out as ^xx:
 150       7F (DEL)
 151       80-9F (C1 control characters)
 152       A0 (nonbreaking space)
 153       FF (Latin small letter y diaeresis)
 154
 155    Other cases:
 156       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 157
 158     Property table values:
 159       Normal escape:    1
 160       Space:            2
 161       Dot:              4
 162       Hex-hex escape:   8
 163       ODS2 normal:     16
 164       ODS2 lower case: 32
 165       Hex digit:       64
 166 */
 167
 168 unsigned char char_prop[ 256] = {
 169
 170 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 171     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 172
 173 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 174     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 175
 176 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 177     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 178
 179 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 180    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 181
 182 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 183     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 184
 185 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 186    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 187
 188 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 189     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 190
 191 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 192    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 193
 194     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 195     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 196     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 197     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 198     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 199     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 200     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 201     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 202 };
 203
 204 /* Utility function: like xstrdup(), but also lowercases S.  */
 205
 206 char *
 207 xstrdup_lower (const char *s)
 208 {
 209   char *copy = xstrdup (s);
 210   char *p = copy;
 211   for (; *p; p++)
 212     *p = c_tolower (*p);
 213   return copy;
 214 }
 215
 216 /* Copy the string formed by two pointers (one on the beginning, other
 217    on the char after the last char) to a new, malloc-ed location.
 218    0-terminate it.  */
 219 char *
 220 strdupdelim (const char *beg, const char *end)
 221 {
 222   char *res = xmalloc (end - beg + 1);
 223   memcpy (res, beg, end - beg);
 224   res[end - beg] = '\0';
 225   return res;
 226 }
 227
 228 /* Parse a string containing comma-separated elements, and return a
 229    vector of char pointers with the elements.  Spaces following the
 230    commas are ignored.  */
 231 char **
 232 sepstring (const char *s)
 233 {
 234   char **res;
 235   const char *p;
 236   int i = 0;
 237
 238   if (!s || !*s)
 239     return NULL;
 240   res = NULL;
 241   p = s;
 242   while (*s)
 243     {
 244       if (*s == ',')
 245         {
 246           res = xrealloc (res, (i + 2) * sizeof (char *));
 247           res[i] = strdupdelim (p, s);
 248           res[++i] = NULL;
 249           ++s;
 250           /* Skip the blanks following the ','.  */
 251           while (c_isspace (*s))
 252             ++s;
 253           p = s;
 254         }
 255       else
 256         ++s;
 257     }
 258   res = xrealloc (res, (i + 2) * sizeof (char *));
 259   res[i] = strdupdelim (p, s);
 260   res[i + 1] = NULL;
 261   return res;
 262 }
 263 \f
 264 /* Like sprintf, but prints into a string of sufficient size freshly
 265    allocated with malloc, which is returned.  If unable to print due
 266    to invalid format, returns NULL.  Inability to allocate needed
 267    memory results in abort, as with xmalloc.  This is in spirit
 268    similar to the GNU/BSD extension asprintf, but somewhat easier to
 269    use.
 270
 271    Internally the function either calls vasprintf or loops around
 272    vsnprintf until the correct size is found.  Since Wget also ships a
 273    fallback implementation of vsnprintf, this should be portable.  */
 274
 275 /* Constant is using for limits memory allocation for text buffer.
 276    Applicable in situation when: vasprintf is not available in the system
 277    and vsnprintf return -1 when long line is truncated (in old versions of
 278    glibc and in other system where C99 doesn`t support) */
 279
 280 #define FMT_MAX_LENGTH 1048576
 281
 282 char *
 283 aprintf (const char *fmt, ...)
 284 {
 285 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 286   /* Use vasprintf. */
 287   int ret;
 288   va_list args;
 289   char *str;
 290   va_start (args, fmt);
 291   ret = vasprintf (&str, fmt, args);
 292   va_end (args);
 293   if (ret < 0 && errno == ENOMEM)
 294     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 295                                                       with xmalloc/xrealloc */
 296   else if (ret < 0)
 297     return NULL;
 298   return str;
 299 #else  /* not HAVE_VASPRINTF */
 300
 301   /* vasprintf is unavailable.  snprintf into a small buffer and
 302      resize it as necessary. */
 303   int size = 32;
 304   char *str = xmalloc (size);
 305
 306   /* #### This code will infloop and eventually abort in xrealloc if
 307      passed a FMT that causes snprintf to consistently return -1.  */
 308
 309   while (1)
 310     {
 311       int n;
 312       va_list args;
 313
 314       va_start (args, fmt);
 315       n = vsnprintf (str, size, fmt, args);
 316       va_end (args);
 317
 318       /* If the printing worked, return the string. */
 319       if (n > -1 && n < size)
 320         return str;
 321
 322       /* Else try again with a larger buffer. */
 323       if (n > -1)               /* C99 */
 324         size = n + 1;           /* precisely what is needed */
 325       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 326         {                               /* maybe we have some wrong
 327                                            format string? */
 328           logprintf (LOG_ALWAYS,
 329                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 330                        "aborting.\n"),
 331                      exec_name, size);  /* printout a log message */
 332           abort ();                     /* and abort... */
 333         }
 334       else
 335         {
 336           /* else, we continue to grow our
 337            * buffer: Twice the old size. */
 338           size <<= 1;
 339         }
 340       str = xrealloc (str, size);
 341     }
 342 #endif /* not HAVE_VASPRINTF */
 343 }
 344
 345 /* Concatenate the NULL-terminated list of string arguments into
 346    freshly allocated space.  */
 347
 348 char *
 349 concat_strings (const char *str0, ...)
 350 {
 351   va_list args;
 352   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 353   char *ret, *p;
 354
 355   const char *next_str;
 356   int total_length = 0;
 357   size_t argcount;
 358
 359   /* Calculate the length of and allocate the resulting string. */
 360
 361   argcount = 0;
 362   va_start (args, str0);
 363   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 364     {
 365       int len = strlen (next_str);
 366       if (argcount < countof (saved_lengths))
 367         saved_lengths[argcount++] = len;
 368       total_length += len;
 369     }
 370   va_end (args);
 371   p = ret = xmalloc (total_length + 1);
 372
 373   /* Copy the strings into the allocated space. */
 374
 375   argcount = 0;
 376   va_start (args, str0);
 377   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 378     {
 379       int len;
 380       if (argcount < countof (saved_lengths))
 381         len = saved_lengths[argcount++];
 382       else
 383         len = strlen (next_str);
 384       memcpy (p, next_str, len);
 385       p += len;
 386     }
 387   va_end (args);
 388   *p = '\0';
 389
 390   return ret;
 391 }
 392 \f
 393 /* Format the provided time according to the specified format.  The
 394    format is a string with format elements supported by strftime.  */
 395
 396 static char *
 397 fmttime (time_t t, const char *fmt)
 398 {
 399   static char output[32];
 400   struct tm *tm = localtime(&t);
 401   if (!tm)
 402     abort ();
 403   if (!strftime(output, sizeof(output), fmt, tm))
 404     abort ();
 405   return output;
 406 }
 407
 408 /* Return pointer to a static char[] buffer in which zero-terminated
 409    string-representation of TM (in form hh:mm:ss) is printed.
 410
 411    If TM is NULL, the current time will be used.  */
 412
 413 char *
 414 time_str (time_t t)
 415 {
 416   return fmttime(t, "%H:%M:%S");
 417 }
 418
 419 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 420
 421 char *
 422 datetime_str (time_t t)
 423 {
 424   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 425 }
 426 \f
 427 /* The Windows versions of the following two functions are defined in
 428    mswindows.c. On MSDOS this function should never be called. */
 429
 430 #ifdef __VMS
 431
 432 void
 433 fork_to_background (void)
 434 {
 435   return;
 436 }
 437
 438 #else /* def __VMS */
 439
 440 #if !defined(WINDOWS) && !defined(MSDOS)
 441 void
 442 fork_to_background (void)
 443 {
 444   pid_t pid;
 445   /* Whether we arrange our own version of opt.lfilename here.  */
 446   bool logfile_changed = false;
 447
 448   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 449     {
 450       /* We must create the file immediately to avoid either a race
 451          condition (which arises from using unique_name and failing to
 452          use fopen_excl) or lying to the user about the log file name
 453          (which arises from using unique_name, printing the name, and
 454          using fopen_excl later on.)  */
 455       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 456       if (new_log_fp)
 457         {
 458           logfile_changed = true;
 459           fclose (new_log_fp);
 460         }
 461     }
 462   pid = fork ();
 463   if (pid < 0)
 464     {
 465       /* parent, error */
 466       perror ("fork");
 467       exit (1);
 468     }
 469   else if (pid != 0)
 470     {
 471       /* parent, no error */
 472       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 473       if (logfile_changed)
 474         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 475       exit (0);                 /* #### should we use _exit()? */
 476     }
 477
 478   /* child: give up the privileges and keep running. */
 479   setsid ();
 480   freopen ("/dev/null", "r", stdin);
 481   freopen ("/dev/null", "w", stdout);
 482   freopen ("/dev/null", "w", stderr);
 483 }
 484 #endif /* !WINDOWS && !MSDOS */
 485
 486 #endif /* def __VMS [else] */
 487
 488 \f
 489 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 490    specified with TM.  The atime ("access time") is set to the current
 491    time.  */
 492
 493 void
 494 touch (const char *file, time_t tm)
 495 {
 496 #if HAVE_UTIME
 497 # ifdef HAVE_STRUCT_UTIMBUF
 498   struct utimbuf times;
 499 # else
 500   struct {
 501     time_t actime;
 502     time_t modtime;
 503   } times;
 504 # endif
 505   times.modtime = tm;
 506   times.actime = time (NULL);
 507   if (utime (file, &times) == -1)
 508     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 509 #else
 510   struct timespec timespecs[2];
 511   int fd;
 512
 513   fd = open (file, O_WRONLY);
 514   if (fd < 0)
 515     {
 516       logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
 517       return;
 518     }
 519
 520   timespecs[0].tv_sec = time (NULL);
 521   timespecs[0].tv_nsec = 0L;
 522   timespecs[1].tv_sec = tm;
 523   timespecs[1].tv_nsec = 0L;
 524
 525   if (futimens (fd, timespecs) == -1)
 526     logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
 527
 528   close (fd);
 529 #endif
 530 }
 531
 532 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 533    nothing under MS-Windows.  */
 534 int
 535 remove_link (const char *file)
 536 {
 537   int err = 0;
 538   struct_stat st;
 539
 540   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 541     {
 542       DEBUGP (("Unlinking %s (symlink).\n", file));
 543       err = unlink (file);
 544       if (err != 0)
 545         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 546                    quote (file), strerror (errno));
 547     }
 548   return err;
 549 }
 550
 551 /* Does FILENAME exist?  This is quite a lousy implementation, since
 552    it supplies no error codes -- only a yes-or-no answer.  Thus it
 553    will return that a file does not exist if, e.g., the directory is
 554    unreadable.  I don't mind it too much currently, though.  The
 555    proper way should, of course, be to have a third, error state,
 556    other than true/false, but that would introduce uncalled-for
 557    additional complexity to the callers.  */
 558 bool
 559 file_exists_p (const char *filename)
 560 {
 561 #ifdef HAVE_ACCESS
 562   return access (filename, F_OK) >= 0;
 563 #else
 564   struct_stat buf;
 565   return stat (filename, &buf) >= 0;
 566 #endif
 567 }
 568
 569 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 570    Returns 0 on error.  */
 571 bool
 572 file_non_directory_p (const char *path)
 573 {
 574   struct_stat buf;
 575   /* Use lstat() rather than stat() so that symbolic links pointing to
 576      directories can be identified correctly.  */
 577   if (lstat (path, &buf) != 0)
 578     return false;
 579   return S_ISDIR (buf.st_mode) ? false : true;
 580 }
 581
 582 /* Return the size of file named by FILENAME, or -1 if it cannot be
 583    opened or seeked into. */
 584 wgint
 585 file_size (const char *filename)
 586 {
 587 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 588   wgint size;
 589   /* We use fseek rather than stat to determine the file size because
 590      that way we can also verify that the file is readable without
 591      explicitly checking for permissions.  Inspired by the POST patch
 592      by Arnaud Wylie.  */
 593   FILE *fp = fopen (filename, "rb");
 594   if (!fp)
 595     return -1;
 596   fseeko (fp, 0, SEEK_END);
 597   size = ftello (fp);
 598   fclose (fp);
 599   return size;
 600 #else
 601   struct_stat st;
 602   if (stat (filename, &st) < 0)
 603     return -1;
 604   return st.st_size;
 605 #endif
 606 }
 607
 608 /* 2005-02-19 SMS.
 609    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 610    original name.  With the VMS file systems' versioning, everything
 611    should be fine, and appending ".NN" just causes trouble.
 612 */
 613
 614 #ifdef UNIQ_SEP
 615
 616 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 617    doesn't exist is found.  Return a freshly allocated copy of the
 618    unused file name.  */
 619
 620 static char *
 621 unique_name_1 (const char *prefix)
 622 {
 623   int count = 1;
 624   int plen = strlen (prefix);
 625   char *template = (char *)alloca (plen + 1 + 24);
 626   char *template_tail = template + plen;
 627
 628   memcpy (template, prefix, plen);
 629   *template_tail++ = UNIQ_SEP;
 630
 631   do
 632     number_to_string (template_tail, count++);
 633   while (file_exists_p (template));
 634
 635   return xstrdup (template);
 636 }
 637
 638 /* Return a unique file name, based on FILE.
 639
 640    More precisely, if FILE doesn't exist, it is returned unmodified.
 641    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 642    file name that doesn't exist is returned.
 643
 644    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 645
 646    The resulting file is not created, only verified that it didn't
 647    exist at the point in time when the function was called.
 648    Therefore, where security matters, don't rely that the file created
 649    by this function exists until you open it with O_EXCL or
 650    equivalent.
 651
 652    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 653    string.  Otherwise, it may return FILE if the file doesn't exist
 654    (and therefore doesn't need changing).  */
 655
 656 char *
 657 unique_name (const char *file, bool allow_passthrough)
 658 {
 659   /* If the FILE itself doesn't exist, return it without
 660      modification. */
 661   if (!file_exists_p (file))
 662     return allow_passthrough ? (char *)file : xstrdup (file);
 663
 664   /* Otherwise, find a numeric suffix that results in unused file name
 665      and return it.  */
 666   return unique_name_1 (file);
 667 }
 668
 669 #else /* def UNIQ_SEP */
 670
 671 /* Dummy unique_name() for VMS.  Return the original name as easily as
 672    possible.
 673 */
 674 char *
 675 unique_name (const char *file, bool allow_passthrough)
 676 {
 677   /* Return the FILE itself, without modification, irregardful. */
 678   return allow_passthrough ? (char *)file : xstrdup (file);
 679 }
 680
 681 #endif /* def UNIQ_SEP [else] */
 682
 683 /* Create a file based on NAME, except without overwriting an existing
 684    file with that name.  Providing O_EXCL is correctly implemented,
 685    this function does not have the race condition associated with
 686    opening the file returned by unique_name.  */
 687
 688 FILE *
 689 unique_create (const char *name, bool binary, char **opened_name)
 690 {
 691   /* unique file name, based on NAME */
 692   char *uname = unique_name (name, false);
 693   FILE *fp;
 694   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 695     {
 696       xfree (uname);
 697       uname = unique_name (name, false);
 698     }
 699   if (opened_name && fp != NULL)
 700     {
 701       if (fp)
 702         *opened_name = uname;
 703       else
 704         {
 705           *opened_name = NULL;
 706           xfree (uname);
 707         }
 708     }
 709   else
 710     xfree (uname);
 711   return fp;
 712 }
 713
 714 /* Open the file for writing, with the addition that the file is
 715    opened "exclusively".  This means that, if the file already exists,
 716    this function will *fail* and errno will be set to EEXIST.  If
 717    BINARY is set, the file will be opened in binary mode, equivalent
 718    to fopen's "wb".
 719
 720    If opening the file fails for any reason, including the file having
 721    previously existed, this function returns NULL and sets errno
 722    appropriately.  */
 723
 724 FILE *
 725 fopen_excl (const char *fname, int binary)
 726 {
 727   int fd;
 728 #ifdef O_EXCL
 729
 730 /* 2005-04-14 SMS.
 731    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 732    It also has file versions which obviate all the O_EXCL effort.
 733    O_TRUNC (something of a misnomer) requests a new version.
 734 */
 735 # ifdef __VMS
 736 /* Common open() optional arguments:
 737    sequential access only, access callback function.
 738 */
 739 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 740
 741   int open_id;
 742   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 743
 744   if (binary > 1)
 745     {
 746       open_id = 11;
 747       fd = open( fname,                 /* File name. */
 748        flags,                           /* Flags. */
 749        0777,                            /* Mode for default protection. */
 750        "ctx=bin,stm",                   /* Binary, stream access. */
 751        "rfm=stmlf",                     /* Stream_LF. */
 752        OPEN_OPT_ARGS);                  /* Access callback. */
 753     }
 754   else if (binary)
 755     {
 756       open_id = 12;
 757       fd = open( fname,                 /* File name. */
 758        flags,                           /* Flags. */
 759        0777,                            /* Mode for default protection. */
 760        "ctx=bin,stm",                   /* Binary, stream access. */
 761        "rfm=fix",                       /* Fixed-length, */
 762        "mrs=512",                       /* 512-byte records. */
 763        OPEN_OPT_ARGS);                  /* Access callback. */
 764     }
 765   else
 766     {
 767       open_id = 13;
 768       fd = open( fname,                 /* File name. */
 769        flags,                           /* Flags. */
 770        0777,                            /* Mode for default protection. */
 771        "rfm=stmlf",                     /* Stream_LF. */
 772        OPEN_OPT_ARGS);                  /* Access callback. */
 773     }
 774 # else /* def __VMS */
 775   int flags = O_WRONLY | O_CREAT | O_EXCL;
 776 # ifdef O_BINARY
 777   if (binary)
 778     flags |= O_BINARY;
 779 # endif
 780   fd = open (fname, flags, 0666);
 781 # endif /* def __VMS [else] */
 782
 783   if (fd < 0)
 784     return NULL;
 785   return fdopen (fd, binary ? "wb" : "w");
 786 #else  /* not O_EXCL */
 787   /* Manually check whether the file exists.  This is prone to race
 788      conditions, but systems without O_EXCL haven't deserved
 789      better.  */
 790   if (file_exists_p (fname))
 791     {
 792       errno = EEXIST;
 793       return NULL;
 794     }
 795   return fopen (fname, binary ? "wb" : "w");
 796 #endif /* not O_EXCL */
 797 }
 798 \f
 799 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 800    are missing, create them first.  In case any mkdir() call fails,
 801    return its error status.  Returns 0 on successful completion.
 802
 803    The behaviour of this function should be identical to the behaviour
 804    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 805 int
 806 make_directory (const char *directory)
 807 {
 808   int i, ret, quit = 0;
 809   char *dir;
 810
 811   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 812      function is unsafe if called with a read-only char *argument.  */
 813   STRDUP_ALLOCA (dir, directory);
 814
 815   /* If the first character of dir is '/', skip it (and thus enable
 816      creation of absolute-pathname directories.  */
 817   for (i = (*dir == '/'); 1; ++i)
 818     {
 819       for (; dir[i] && dir[i] != '/'; i++)
 820         ;
 821       if (!dir[i])
 822         quit = 1;
 823       dir[i] = '\0';
 824       /* Check whether the directory already exists.  Allow creation of
 825          of intermediate directories to fail, as the initial path components
 826          are not necessarily directories!  */
 827       if (!file_exists_p (dir))
 828         ret = mkdir (dir, 0777);
 829       else
 830         ret = 0;
 831       if (quit)
 832         break;
 833       else
 834         dir[i] = '/';
 835     }
 836   return ret;
 837 }
 838
 839 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 840    should be a file name.
 841
 842    file_merge("/foo/bar", "baz")  => "/foo/baz"
 843    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 844    file_merge("foo", "bar")       => "bar"
 845
 846    In other words, it's a simpler and gentler version of uri_merge.  */
 847
 848 char *
 849 file_merge (const char *base, const char *file)
 850 {
 851   char *result;
 852   const char *cut = (const char *)strrchr (base, '/');
 853
 854   if (!cut)
 855     return xstrdup (file);
 856
 857   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 858   memcpy (result, base, cut - base);
 859   result[cut - base] = '/';
 860   strcpy (result + (cut - base) + 1, file);
 861
 862   return result;
 863 }
 864 \f
 865 /* Like fnmatch, but performs a case-insensitive match.  */
 866
 867 int
 868 fnmatch_nocase (const char *pattern, const char *string, int flags)
 869 {
 870 #ifdef FNM_CASEFOLD
 871   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 872      also present on *BSD platforms, and possibly elsewhere.  */
 873   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 874 #else
 875   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 876   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 877   char *strcopy = (char *) alloca (strlen (string) + 1);
 878   char *p;
 879   for (p = patcopy; *pattern; pattern++, p++)
 880     *p = c_tolower (*pattern);
 881   *p = '\0';
 882   for (p = strcopy; *string; string++, p++)
 883     *p = c_tolower (*string);
 884   *p = '\0';
 885   return fnmatch (patcopy, strcopy, flags);
 886 #endif
 887 }
 888
 889 static bool in_acclist (const char *const *, const char *, bool);
 890
 891 /* Determine whether a file is acceptable to be followed, according to
 892    lists of patterns to accept/reject.  */
 893 bool
 894 acceptable (const char *s)
 895 {
 896   int l = strlen (s);
 897
 898   if (opt.output_document && strcmp (s, opt.output_document) == 0)
 899     return true;
 900
 901   while (l && s[l] != '/')
 902     --l;
 903   if (s[l] == '/')
 904     s += (l + 1);
 905   if (opt.accepts)
 906     {
 907       if (opt.rejects)
 908         return (in_acclist ((const char *const *)opt.accepts, s, true)
 909                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 910       else
 911         return in_acclist ((const char *const *)opt.accepts, s, true);
 912     }
 913   else if (opt.rejects)
 914     return !in_acclist ((const char *const *)opt.rejects, s, true);
 915   return true;
 916 }
 917
 918 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 919    will return true if and only if D2 begins with `/something/' or is exactly
 920    '/something'.  */
 921 bool
 922 subdir_p (const char *d1, const char *d2)
 923 {
 924   if (*d1 == '\0')
 925     return true;
 926   if (!opt.ignore_case)
 927     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 928       ;
 929   else
 930     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 931       ;
 932
 933   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 934 }
 935
 936 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 937    first element that matches DIR, through wildcards or front comparison (as
 938    appropriate).  */
 939 static bool
 940 dir_matches_p (char **dirlist, const char *dir)
 941 {
 942   char **x;
 943   int (*matcher) (const char *, const char *, int)
 944     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 945
 946   for (x = dirlist; *x; x++)
 947     {
 948       /* Remove leading '/' */
 949       char *p = *x + (**x == '/');
 950       if (has_wildcards_p (p))
 951         {
 952           if (matcher (p, dir, FNM_PATHNAME) == 0)
 953             break;
 954         }
 955       else
 956         {
 957           if (subdir_p (p, dir))
 958             break;
 959         }
 960     }
 961
 962   return *x ? true : false;
 963 }
 964
 965 /* Returns whether DIRECTORY is acceptable for download, wrt the
 966    include/exclude lists.
 967
 968    The leading `/' is ignored in paths; relative and absolute paths
 969    may be freely intermixed.  */
 970
 971 bool
 972 accdir (const char *directory)
 973 {
 974   /* Remove starting '/'.  */
 975   if (*directory == '/')
 976     ++directory;
 977   if (opt.includes)
 978     {
 979       if (!dir_matches_p (opt.includes, directory))
 980         return false;
 981     }
 982   if (opt.excludes)
 983     {
 984       if (dir_matches_p (opt.excludes, directory))
 985         return false;
 986     }
 987   return true;
 988 }
 989
 990 /* Return true if STRING ends with TAIL.  For instance:
 991
 992    match_tail ("abc", "bc", false)  -> 1
 993    match_tail ("abc", "ab", false)  -> 0
 994    match_tail ("abc", "abc", false) -> 1
 995
 996    If FOLD_CASE is true, the comparison will be case-insensitive.  */
 997
 998 bool
 999 match_tail (const char *string, const char *tail, bool fold_case)
1000 {
1001   int i, j;
1002
1003   /* We want this to be fast, so we code two loops, one with
1004      case-folding, one without. */
1005
1006   if (!fold_case)
1007     {
1008       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1009         if (string[i] != tail[j])
1010           break;
1011     }
1012   else
1013     {
1014       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1015         if (c_tolower (string[i]) != c_tolower (tail[j]))
1016           break;
1017     }
1018
1019   /* If the tail was exhausted, the match was succesful.  */
1020   if (j == -1)
1021     return true;
1022   else
1023     return false;
1024 }
1025
1026 /* Checks whether string S matches each element of ACCEPTS.  A list
1027    element are matched either with fnmatch() or match_tail(),
1028    according to whether the element contains wildcards or not.
1029
1030    If the BACKWARD is false, don't do backward comparison -- just compare
1031    them normally.  */
1032 static bool
1033 in_acclist (const char *const *accepts, const char *s, bool backward)
1034 {
1035   for (; *accepts; accepts++)
1036     {
1037       if (has_wildcards_p (*accepts))
1038         {
1039           int res = opt.ignore_case
1040             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1041           /* fnmatch returns 0 if the pattern *does* match the string.  */
1042           if (res == 0)
1043             return true;
1044         }
1045       else
1046         {
1047           if (backward)
1048             {
1049               if (match_tail (s, *accepts, opt.ignore_case))
1050                 return true;
1051             }
1052           else
1053             {
1054               int cmp = opt.ignore_case
1055                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1056               if (cmp == 0)
1057                 return true;
1058             }
1059         }
1060     }
1061   return false;
1062 }
1063
1064 /* Return the location of STR's suffix (file extension).  Examples:
1065    suffix ("foo.bar")       -> "bar"
1066    suffix ("foo.bar.baz")   -> "baz"
1067    suffix ("/foo/bar")      -> NULL
1068    suffix ("/foo.bar/baz")  -> NULL  */
1069 char *
1070 suffix (const char *str)
1071 {
1072   int i;
1073
1074   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
1075     ;
1076
1077   if (str[i++] == '.')
1078     return (char *)str + i;
1079   else
1080     return NULL;
1081 }
1082
1083 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1084    `]').  */
1085
1086 bool
1087 has_wildcards_p (const char *s)
1088 {
1089   for (; *s; s++)
1090     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
1091       return true;
1092   return false;
1093 }
1094
1095 /* Return true if FNAME ends with a typical HTML suffix.  The
1096    following (case-insensitive) suffixes are presumed to be HTML
1097    files:
1098
1099      html
1100      htm
1101      ?html (`?' matches one character)
1102
1103    #### CAVEAT.  This is not necessarily a good indication that FNAME
1104    refers to a file that contains HTML!  */
1105 bool
1106 has_html_suffix_p (const char *fname)
1107 {
1108   char *suf;
1109
1110   if ((suf = suffix (fname)) == NULL)
1111     return false;
1112   if (!strcasecmp (suf, "html"))
1113     return true;
1114   if (!strcasecmp (suf, "htm"))
1115     return true;
1116   if (suf[0] && !strcasecmp (suf + 1, "html"))
1117     return true;
1118   return false;
1119 }
1120
1121 /* Read a line from FP and return the pointer to freshly allocated
1122    storage.  The storage space is obtained through malloc() and should
1123    be freed with free() when it is no longer needed.
1124
1125    The length of the line is not limited, except by available memory.
1126    The newline character at the end of line is retained.  The line is
1127    terminated with a zero character.
1128
1129    After end-of-file is encountered without anything being read, NULL
1130    is returned.  NULL is also returned on error.  To distinguish
1131    between these two cases, use the stdio function ferror().  */
1132
1133 char *
1134 read_whole_line (FILE *fp)
1135 {
1136   int length = 0;
1137   int bufsize = 82;
1138   char *line = xmalloc (bufsize);
1139
1140   while (fgets (line + length, bufsize - length, fp))
1141     {
1142       length += strlen (line + length);
1143       if (length == 0)
1144         /* Possible for example when reading from a binary file where
1145            a line begins with \0.  */
1146         continue;
1147
1148       if (line[length - 1] == '\n')
1149         break;
1150
1151       /* fgets() guarantees to read the whole line, or to use up the
1152          space we've given it.  We can double the buffer
1153          unconditionally.  */
1154       bufsize <<= 1;
1155       line = xrealloc (line, bufsize);
1156     }
1157   if (length == 0 || ferror (fp))
1158     {
1159       xfree (line);
1160       return NULL;
1161     }
1162   if (length + 1 < bufsize)
1163     /* Relieve the memory from our exponential greediness.  We say
1164        `length + 1' because the terminating \0 is not included in
1165        LENGTH.  We don't need to zero-terminate the string ourselves,
1166        though, because fgets() does that.  */
1167     line = xrealloc (line, length + 1);
1168   return line;
1169 }
1170 \f
1171 /* Read FILE into memory.  A pointer to `struct file_memory' are
1172    returned; use struct element `content' to access file contents, and
1173    the element `length' to know the file length.  `content' is *not*
1174    zero-terminated, and you should *not* read or write beyond the [0,
1175    length) range of characters.
1176
1177    After you are done with the file contents, call wget_read_file_free to
1178    release the memory.
1179
1180    Depending on the operating system and the type of file that is
1181    being read, wget_read_file() either mmap's the file into memory, or
1182    reads the file into the core using read().
1183
1184    If file is named "-", fileno(stdin) is used for reading instead.
1185    If you want to read from a real file named "-", use "./-" instead.  */
1186
1187 struct file_memory *
1188 wget_read_file (const char *file)
1189 {
1190   int fd;
1191   struct file_memory *fm;
1192   long size;
1193   bool inhibit_close = false;
1194
1195   /* Some magic in the finest tradition of Perl and its kin: if FILE
1196      is "-", just use stdin.  */
1197   if (HYPHENP (file))
1198     {
1199       fd = fileno (stdin);
1200       inhibit_close = true;
1201       /* Note that we don't inhibit mmap() in this case.  If stdin is
1202          redirected from a regular file, mmap() will still work.  */
1203     }
1204   else
1205     fd = open (file, O_RDONLY);
1206   if (fd < 0)
1207     return NULL;
1208   fm = xnew (struct file_memory);
1209
1210 #ifdef HAVE_MMAP
1211   {
1212     struct_fstat buf;
1213     if (fstat (fd, &buf) < 0)
1214       goto mmap_lose;
1215     fm->length = buf.st_size;
1216     /* NOTE: As far as I know, the callers of this function never
1217        modify the file text.  Relying on this would enable us to
1218        specify PROT_READ and MAP_SHARED for a marginal gain in
1219        efficiency, but at some cost to generality.  */
1220     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1221                         MAP_PRIVATE, fd, 0);
1222     if (fm->content == (char *)MAP_FAILED)
1223       goto mmap_lose;
1224     if (!inhibit_close)
1225       close (fd);
1226
1227     fm->mmap_p = 1;
1228     return fm;
1229   }
1230
1231  mmap_lose:
1232   /* The most common reason why mmap() fails is that FD does not point
1233      to a plain file.  However, it's also possible that mmap() doesn't
1234      work for a particular type of file.  Therefore, whenever mmap()
1235      fails, we just fall back to the regular method.  */
1236 #endif /* HAVE_MMAP */
1237
1238   fm->length = 0;
1239   size = 512;                   /* number of bytes fm->contents can
1240                                    hold at any given time. */
1241   fm->content = xmalloc (size);
1242   while (1)
1243     {
1244       wgint nread;
1245       if (fm->length > size / 2)
1246         {
1247           /* #### I'm not sure whether the whole exponential-growth
1248              thing makes sense with kernel read.  On Linux at least,
1249              read() refuses to read more than 4K from a file at a
1250              single chunk anyway.  But other Unixes might optimize it
1251              better, and it doesn't *hurt* anything, so I'm leaving
1252              it.  */
1253
1254           /* Normally, we grow SIZE exponentially to make the number
1255              of calls to read() and realloc() logarithmic in relation
1256              to file size.  However, read() can read an amount of data
1257              smaller than requested, and it would be unreasonable to
1258              double SIZE every time *something* was read.  Therefore,
1259              we double SIZE only when the length exceeds half of the
1260              entire allocated size.  */
1261           size <<= 1;
1262           fm->content = xrealloc (fm->content, size);
1263         }
1264       nread = read (fd, fm->content + fm->length, size - fm->length);
1265       if (nread > 0)
1266         /* Successful read. */
1267         fm->length += nread;
1268       else if (nread < 0)
1269         /* Error. */
1270         goto lose;
1271       else
1272         /* EOF */
1273         break;
1274     }
1275   if (!inhibit_close)
1276     close (fd);
1277   if (size > fm->length && fm->length != 0)
1278     /* Due to exponential growth of fm->content, the allocated region
1279        might be much larger than what is actually needed.  */
1280     fm->content = xrealloc (fm->content, fm->length);
1281   fm->mmap_p = 0;
1282   return fm;
1283
1284  lose:
1285   if (!inhibit_close)
1286     close (fd);
1287   xfree (fm->content);
1288   xfree (fm);
1289   return NULL;
1290 }
1291
1292 /* Release the resources held by FM.  Specifically, this calls
1293    munmap() or xfree() on fm->content, depending whether mmap or
1294    malloc/read were used to read in the file.  It also frees the
1295    memory needed to hold the FM structure itself.  */
1296
1297 void
1298 wget_read_file_free (struct file_memory *fm)
1299 {
1300 #ifdef HAVE_MMAP
1301   if (fm->mmap_p)
1302     {
1303       munmap (fm->content, fm->length);
1304     }
1305   else
1306 #endif
1307     {
1308       xfree (fm->content);
1309     }
1310   xfree (fm);
1311 }
1312 \f
1313 /* Free the pointers in a NULL-terminated vector of pointers, then
1314    free the pointer itself.  */
1315 void
1316 free_vec (char **vec)
1317 {
1318   if (vec)
1319     {
1320       char **p = vec;
1321       while (*p)
1322         xfree (*p++);
1323       xfree (vec);
1324     }
1325 }
1326
1327 /* Append vector V2 to vector V1.  The function frees V2 and
1328    reallocates V1 (thus you may not use the contents of neither
1329    pointer after the call).  If V1 is NULL, V2 is returned.  */
1330 char **
1331 merge_vecs (char **v1, char **v2)
1332 {
1333   int i, j;
1334
1335   if (!v1)
1336     return v2;
1337   if (!v2)
1338     return v1;
1339   if (!*v2)
1340     {
1341       /* To avoid j == 0 */
1342       xfree (v2);
1343       return v1;
1344     }
1345   /* Count v1.  */
1346   for (i = 0; v1[i]; i++)
1347     ;
1348   /* Count v2.  */
1349   for (j = 0; v2[j]; j++)
1350     ;
1351   /* Reallocate v1.  */
1352   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1353   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1354   xfree (v2);
1355   return v1;
1356 }
1357
1358 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1359    is allocated as needed.  Return the new value of the vector. */
1360
1361 char **
1362 vec_append (char **vec, const char *str)
1363 {
1364   int cnt;                      /* count of vector elements, including
1365                                    the one we're about to append */
1366   if (vec != NULL)
1367     {
1368       for (cnt = 0; vec[cnt]; cnt++)
1369         ;
1370       ++cnt;
1371     }
1372   else
1373     cnt = 1;
1374   /* Reallocate the array to fit the new element and the NULL. */
1375   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1376   /* Append a copy of STR to the vector. */
1377   vec[cnt - 1] = xstrdup (str);
1378   vec[cnt] = NULL;
1379   return vec;
1380 }
1381 \f
1382 /* Sometimes it's useful to create "sets" of strings, i.e. special
1383    hash tables where you want to store strings as keys and merely
1384    query for their existence.  Here is a set of utility routines that
1385    makes that transparent.  */
1386
1387 void
1388 string_set_add (struct hash_table *ht, const char *s)
1389 {
1390   /* First check whether the set element already exists.  If it does,
1391      do nothing so that we don't have to free() the old element and
1392      then strdup() a new one.  */
1393   if (hash_table_contains (ht, s))
1394     return;
1395
1396   /* We use "1" as value.  It provides us a useful and clear arbitrary
1397      value, and it consumes no memory -- the pointers to the same
1398      string "1" will be shared by all the key-value pairs in all `set'
1399      hash tables.  */
1400   hash_table_put (ht, xstrdup (s), "1");
1401 }
1402
1403 /* Synonym for hash_table_contains... */
1404
1405 int
1406 string_set_contains (struct hash_table *ht, const char *s)
1407 {
1408   return hash_table_contains (ht, s);
1409 }
1410
1411 /* Convert the specified string set to array.  ARRAY should be large
1412    enough to hold hash_table_count(ht) char pointers.  */
1413
1414 void string_set_to_array (struct hash_table *ht, char **array)
1415 {
1416   hash_table_iterator iter;
1417   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1418     *array++ = iter.key;
1419 }
1420
1421 /* Free the string set.  This frees both the storage allocated for
1422    keys and the actual hash table.  (hash_table_destroy would only
1423    destroy the hash table.)  */
1424
1425 void
1426 string_set_free (struct hash_table *ht)
1427 {
1428   hash_table_iterator iter;
1429   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1430     xfree (iter.key);
1431   hash_table_destroy (ht);
1432 }
1433
1434 /* Utility function: simply call xfree() on all keys and values of HT.  */
1435
1436 void
1437 free_keys_and_values (struct hash_table *ht)
1438 {
1439   hash_table_iterator iter;
1440   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1441     {
1442       xfree (iter.key);
1443       xfree (iter.value);
1444     }
1445 }
1446 \f
1447 /* Get digit grouping data for thousand separors by calling
1448    localeconv().  The data includes separator string and grouping info
1449    and is cached after the first call to the function.
1450
1451    In locales that don't set a thousand separator (such as the "C"
1452    locale), this forces it to be ",".  We are now only showing
1453    thousand separators in one place, so this shouldn't be a problem in
1454    practice.  */
1455
1456 static void
1457 get_grouping_data (const char **sep, const char **grouping)
1458 {
1459   static const char *cached_sep;
1460   static const char *cached_grouping;
1461   static bool initialized;
1462   if (!initialized)
1463     {
1464       /* Get the grouping info from the locale. */
1465       struct lconv *lconv = localeconv ();
1466       cached_sep = lconv->thousands_sep;
1467       cached_grouping = lconv->grouping;
1468 #if ! USE_NLS_PROGRESS_BAR
1469       /* We can't count column widths, so ensure that the separator
1470        * is single-byte only (let check below determine what byte). */
1471       if (strlen(cached_sep) > 1)
1472         cached_sep = "";
1473 #endif
1474       if (!*cached_sep)
1475         {
1476           /* Many locales (such as "C" or "hr_HR") don't specify
1477              grouping, which we still want to use it for legibility.
1478              In those locales set the sep char to ',', unless that
1479              character is used for decimal point, in which case set it
1480              to ".".  */
1481           if (*lconv->decimal_point != ',')
1482             cached_sep = ",";
1483           else
1484             cached_sep = ".";
1485           cached_grouping = "\x03";
1486         }
1487       initialized = true;
1488     }
1489   *sep = cached_sep;
1490   *grouping = cached_grouping;
1491 }
1492
1493 /* Return a printed representation of N with thousand separators.
1494    This should respect locale settings, with the exception of the "C"
1495    locale which mandates no separator, but we use one anyway.
1496
1497    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1498    the separators because it's too non-portable, and it's hard to test
1499    for this feature at configure time.  Besides, it wouldn't display
1500    separators in the "C" locale, still used by many Unix users.  */
1501
1502 const char *
1503 with_thousand_seps (wgint n)
1504 {
1505   static char outbuf[48];
1506   char *p = outbuf + sizeof outbuf;
1507
1508   /* Info received from locale */
1509   const char *grouping, *sep;
1510   int seplen;
1511
1512   /* State information */
1513   int i = 0, groupsize;
1514   const char *atgroup;
1515
1516   bool negative = n < 0;
1517
1518   /* Initialize grouping data. */
1519   get_grouping_data (&sep, &grouping);
1520   seplen = strlen (sep);
1521   atgroup = grouping;
1522   groupsize = *atgroup++;
1523
1524   /* This would overflow on WGINT_MIN, but printing negative numbers
1525      is not an important goal of this fuinction.  */
1526   if (negative)
1527     n = -n;
1528
1529   /* Write the number into the buffer, backwards, inserting the
1530      separators as necessary.  */
1531   *--p = '\0';
1532   while (1)
1533     {
1534       *--p = n % 10 + '0';
1535       n /= 10;
1536       if (n == 0)
1537         break;
1538       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1539       if (++i == groupsize)
1540         {
1541           if (seplen == 1)
1542             *--p = *sep;
1543           else
1544             memcpy (p -= seplen, sep, seplen);
1545           i = 0;
1546           if (*atgroup)
1547             groupsize = *atgroup++;
1548         }
1549     }
1550   if (negative)
1551     *--p = '-';
1552
1553   return p;
1554 }
1555
1556 /* N, a byte quantity, is converted to a human-readable abberviated
1557    form a la sizes printed by `ls -lh'.  The result is written to a
1558    static buffer, a pointer to which is returned.
1559
1560    Unlike `with_thousand_seps', this approximates to the nearest unit.
1561    Quoting GNU libit: "Most people visually process strings of 3-4
1562    digits effectively, but longer strings of digits are more prone to
1563    misinterpretation.  Hence, converting to an abbreviated form
1564    usually improves readability."
1565
1566    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1567    original computer-related meaning of "powers of 1024".  We don't
1568    use the "*bibyte" names invented in 1998, and seldom used in
1569    practice.  Wikipedia's entry on "binary prefix" discusses this in
1570    some detail.  */
1571
1572 char *
1573 human_readable (HR_NUMTYPE n)
1574 {
1575   /* These suffixes are compatible with those of GNU `ls -lh'. */
1576   static char powers[] =
1577     {
1578       'K',                      /* kilobyte, 2^10 bytes */
1579       'M',                      /* megabyte, 2^20 bytes */
1580       'G',                      /* gigabyte, 2^30 bytes */
1581       'T',                      /* terabyte, 2^40 bytes */
1582       'P',                      /* petabyte, 2^50 bytes */
1583       'E',                      /* exabyte,  2^60 bytes */
1584     };
1585   static char buf[8];
1586   size_t i;
1587
1588   /* If the quantity is smaller than 1K, just print it. */
1589   if (n < 1024)
1590     {
1591       snprintf (buf, sizeof (buf), "%d", (int) n);
1592       return buf;
1593     }
1594
1595   /* Loop over powers, dividing N with 1024 in each iteration.  This
1596      works unchanged for all sizes of wgint, while still avoiding
1597      non-portable `long double' arithmetic.  */
1598   for (i = 0; i < countof (powers); i++)
1599     {
1600       /* At each iteration N is greater than the *subsequent* power.
1601          That way N/1024.0 produces a decimal number in the units of
1602          *this* power.  */
1603       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1604         {
1605           double val = n / 1024.0;
1606           /* Print values smaller than 10 with one decimal digits, and
1607              others without any decimals.  */
1608           snprintf (buf, sizeof (buf), "%.*f%c",
1609                     val < 10 ? 1 : 0, val, powers[i]);
1610           return buf;
1611         }
1612       n /= 1024;
1613     }
1614   return NULL;                  /* unreached */
1615 }
1616
1617 /* Count the digits in the provided number.  Used to allocate space
1618    when printing numbers.  */
1619
1620 int
1621 numdigit (wgint number)
1622 {
1623   int cnt = 1;
1624   if (number < 0)
1625     ++cnt;                      /* accomodate '-' */
1626   while ((number /= 10) != 0)
1627     ++cnt;
1628   return cnt;
1629 }
1630
1631 #define PR(mask) *p++ = n / (mask) + '0'
1632
1633 /* DIGITS_<D> is used to print a D-digit number and should be called
1634    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1635    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1636    Recursively this continues until DIGITS_1 is invoked.  */
1637
1638 #define DIGITS_1(mask) PR (mask)
1639 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1640 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1641 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1642 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1643 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1644 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1645 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1646 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1647 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1648
1649 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1650
1651 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1652 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1653 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1654 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1655 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1656 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1657 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1658 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1659 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1660
1661 /* Shorthand for casting to wgint. */
1662 #define W wgint
1663
1664 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1665    `sprintf(buffer, "%lld", (long long) number)', only typically much
1666    faster and portable to machines without long long.
1667
1668    The speedup may make a difference in programs that frequently
1669    convert numbers to strings.  Some implementations of sprintf,
1670    particularly the one in some versions of GNU libc, have been known
1671    to be quite slow when converting integers to strings.
1672
1673    Return the pointer to the location where the terminating zero was
1674    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1675    function is done.)
1676
1677    BUFFER should be large enough to accept as many bytes as you expect
1678    the number to take up.  On machines with 64-bit wgints the maximum
1679    needed size is 24 bytes.  That includes the digits needed for the
1680    largest 64-bit number, the `-' sign in case it's negative, and the
1681    terminating '\0'.  */
1682
1683 char *
1684 number_to_string (char *buffer, wgint number)
1685 {
1686   char *p = buffer;
1687   wgint n = number;
1688
1689   int last_digit_char = 0;
1690
1691 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1692   /* We are running in a very strange environment.  Leave the correct
1693      printing to sprintf.  */
1694   p += sprintf (buf, "%j", (intmax_t) (n));
1695 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1696
1697   if (n < 0)
1698     {
1699       if (n < -WGINT_MAX)
1700         {
1701           /* n = -n would overflow because -n would evaluate to a
1702              wgint value larger than WGINT_MAX.  Need to make n
1703              smaller and handle the last digit separately.  */
1704           int last_digit = n % 10;
1705           /* The sign of n%10 is implementation-defined. */
1706           if (last_digit < 0)
1707             last_digit_char = '0' - last_digit;
1708           else
1709             last_digit_char = '0' + last_digit;
1710           /* After n is made smaller, -n will not overflow. */
1711           n /= 10;
1712         }
1713
1714       *p++ = '-';
1715       n = -n;
1716     }
1717
1718   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1719      way printing any N is fully open-coded without a loop or jump.
1720      (Also see description of DIGITS_*.)  */
1721
1722   if      (n < 10)                       DIGITS_1 (1);
1723   else if (n < 100)                      DIGITS_2 (10);
1724   else if (n < 1000)                     DIGITS_3 (100);
1725   else if (n < 10000)                    DIGITS_4 (1000);
1726   else if (n < 100000)                   DIGITS_5 (10000);
1727   else if (n < 1000000)                  DIGITS_6 (100000);
1728   else if (n < 10000000)                 DIGITS_7 (1000000);
1729   else if (n < 100000000)                DIGITS_8 (10000000);
1730   else if (n < 1000000000)               DIGITS_9 (100000000);
1731 #if SIZEOF_WGINT == 4
1732   /* wgint is 32 bits wide: no number has more than 10 digits. */
1733   else                                   DIGITS_10 (1000000000);
1734 #else
1735   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1736      Constants are constructed by compile-time multiplication to avoid
1737      dealing with different notations for 64-bit constants
1738      (nL/nLL/nI64, depending on the compiler and architecture).  */
1739   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1740   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1741   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1742   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1743   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1744   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1745   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1746   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1747   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1748   else                                   DIGITS_19 (1000000000*(W)1000000000);
1749 #endif
1750
1751   if (last_digit_char)
1752     *p++ = last_digit_char;
1753
1754   *p = '\0';
1755 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1756
1757   return p;
1758 }
1759
1760 #undef PR
1761 #undef W
1762 #undef SPRINTF_WGINT
1763 #undef DIGITS_1
1764 #undef DIGITS_2
1765 #undef DIGITS_3
1766 #undef DIGITS_4
1767 #undef DIGITS_5
1768 #undef DIGITS_6
1769 #undef DIGITS_7
1770 #undef DIGITS_8
1771 #undef DIGITS_9
1772 #undef DIGITS_10
1773 #undef DIGITS_11
1774 #undef DIGITS_12
1775 #undef DIGITS_13
1776 #undef DIGITS_14
1777 #undef DIGITS_15
1778 #undef DIGITS_16
1779 #undef DIGITS_17
1780 #undef DIGITS_18
1781 #undef DIGITS_19
1782
1783 #define RING_SIZE 3
1784
1785 /* Print NUMBER to a statically allocated string and return a pointer
1786    to the printed representation.
1787
1788    This function is intended to be used in conjunction with printf.
1789    It is hard to portably print wgint values:
1790     a) you cannot use printf("%ld", number) because wgint can be long
1791        long on 32-bit machines with LFS.
1792     b) you cannot use printf("%lld", number) because NUMBER could be
1793        long on 32-bit machines without LFS, or on 64-bit machines,
1794        which do not require LFS.  Also, Windows doesn't support %lld.
1795     c) you cannot use printf("%j", (int_max_t) number) because not all
1796        versions of printf support "%j", the most notable being the one
1797        on Windows.
1798     d) you cannot #define WGINT_FMT to the appropriate format and use
1799        printf(WGINT_FMT, number) because that would break translations
1800        for user-visible messages, such as printf("Downloaded: %d
1801        bytes\n", number).
1802
1803    What you should use instead is printf("%s", number_to_static_string
1804    (number)).
1805
1806    CAVEAT: since the function returns pointers to static data, you
1807    must be careful to copy its result before calling it again.
1808    However, to make it more useful with printf, the function maintains
1809    an internal ring of static buffers to return.  That way things like
1810    printf("%s %s", number_to_static_string (num1),
1811    number_to_static_string (num2)) work as expected.  Three buffers
1812    are currently used, which means that "%s %s %s" will work, but "%s
1813    %s %s %s" won't.  If you need to print more than three wgints,
1814    bump the RING_SIZE (or rethink your message.)  */
1815
1816 char *
1817 number_to_static_string (wgint number)
1818 {
1819   static char ring[RING_SIZE][24];
1820   static int ringpos;
1821   char *buf = ring[ringpos];
1822   number_to_string (buf, number);
1823   ringpos = (ringpos + 1) % RING_SIZE;
1824   return buf;
1825 }
1826
1827 /* Converts the byte to bits format if --bits option is enabled
1828  */
1829 wgint
1830 convert_to_bits (wgint num)
1831 {
1832   if (opt.bits_fmt)
1833     return num * 8;
1834   return num;
1835 }
1836
1837 \f
1838 /* Determine the width of the terminal we're running on.  If that's
1839    not possible, return 0.  */
1840
1841 int
1842 determine_screen_width (void)
1843 {
1844   /* If there's a way to get the terminal size using POSIX
1845      tcgetattr(), somebody please tell me.  */
1846 #ifdef TIOCGWINSZ
1847   int fd;
1848   struct winsize wsz;
1849
1850   if (opt.lfilename != NULL)
1851     return 0;
1852
1853   fd = fileno (stderr);
1854   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1855     return 0;                   /* most likely ENOTTY */
1856
1857   return wsz.ws_col;
1858 #elif defined(WINDOWS)
1859   CONSOLE_SCREEN_BUFFER_INFO csbi;
1860   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1861     return 0;
1862   return csbi.dwSize.X;
1863 #else  /* neither TIOCGWINSZ nor WINDOWS */
1864   return 0;
1865 #endif /* neither TIOCGWINSZ nor WINDOWS */
1866 }
1867 \f
1868 /* Whether the rnd system (either rand or [dl]rand48) has been
1869    seeded.  */
1870 static int rnd_seeded;
1871
1872 /* Return a random number between 0 and MAX-1, inclusive.
1873
1874    If the system does not support lrand48 and MAX is greater than the
1875    value of RAND_MAX+1 on the system, the returned value will be in
1876    the range [0, RAND_MAX].  This may be fixed in a future release.
1877    The random number generator is seeded automatically the first time
1878    it is called.
1879
1880    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1881    for cryptography.  It is only meant to be used in situations where
1882    quality of the random numbers returned doesn't really matter.  */
1883
1884 int
1885 random_number (int max)
1886 {
1887 #ifdef HAVE_DRAND48
1888   if (!rnd_seeded)
1889     {
1890       srand48 ((long) time (NULL) ^ (long) getpid ());
1891       rnd_seeded = 1;
1892     }
1893   return lrand48 () % max;
1894 #else  /* not HAVE_DRAND48 */
1895
1896   double bounded;
1897   int rnd;
1898   if (!rnd_seeded)
1899     {
1900       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1901       rnd_seeded = 1;
1902     }
1903   rnd = rand ();
1904
1905   /* Like rand() % max, but uses the high-order bits for better
1906      randomness on architectures where rand() is implemented using a
1907      simple congruential generator.  */
1908
1909   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1910   return (int) bounded;
1911
1912 #endif /* not HAVE_DRAND48 */
1913 }
1914
1915 /* Return a random uniformly distributed floating point number in the
1916    [0, 1) range.  Uses drand48 where available, and a really lame
1917    kludge elsewhere.  */
1918
1919 double
1920 random_float (void)
1921 {
1922 #ifdef HAVE_DRAND48
1923   if (!rnd_seeded)
1924     {
1925       srand48 ((long) time (NULL) ^ (long) getpid ());
1926       rnd_seeded = 1;
1927     }
1928   return drand48 ();
1929 #else  /* not HAVE_DRAND48 */
1930   return (  random_number (10000) / 10000.0
1931           + random_number (10000) / (10000.0 * 10000.0)
1932           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1933           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1934 #endif /* not HAVE_DRAND48 */
1935 }
1936 \f
1937 /* Implementation of run_with_timeout, a generic timeout-forcing
1938    routine for systems with Unix-like signal handling.  */
1939
1940 #ifdef USE_SIGNAL_TIMEOUT
1941 # ifdef HAVE_SIGSETJMP
1942 #  define SETJMP(env) sigsetjmp (env, 1)
1943
1944 static sigjmp_buf run_with_timeout_env;
1945
1946 static void
1947 abort_run_with_timeout (int sig)
1948 {
1949   assert (sig == SIGALRM);
1950   siglongjmp (run_with_timeout_env, -1);
1951 }
1952 # else /* not HAVE_SIGSETJMP */
1953 #  define SETJMP(env) setjmp (env)
1954
1955 static jmp_buf run_with_timeout_env;
1956
1957 static void
1958 abort_run_with_timeout (int sig)
1959 {
1960   assert (sig == SIGALRM);
1961   /* We don't have siglongjmp to preserve the set of blocked signals;
1962      if we longjumped out of the handler at this point, SIGALRM would
1963      remain blocked.  We must unblock it manually. */
1964   sigset_t set;
1965   sigemptyset (&set);
1966   sigaddset (&set, SIGALRM);
1967   sigprocmask (SIG_BLOCK, &set, NULL);
1968
1969   /* Now it's safe to longjump. */
1970   longjmp (run_with_timeout_env, -1);
1971 }
1972 # endif /* not HAVE_SIGSETJMP */
1973
1974 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1975    setitimer where available, alarm otherwise.
1976
1977    TIMEOUT should be non-zero.  If the timeout value is so small that
1978    it would be rounded to zero, it is rounded to the least legal value
1979    instead (1us for setitimer, 1s for alarm).  That ensures that
1980    SIGALRM will be delivered in all cases.  */
1981
1982 static void
1983 alarm_set (double timeout)
1984 {
1985 #ifdef ITIMER_REAL
1986   /* Use the modern itimer interface. */
1987   struct itimerval itv;
1988   xzero (itv);
1989   itv.it_value.tv_sec = (long) timeout;
1990   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1991   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1992     /* Ensure that we wait for at least the minimum interval.
1993        Specifying zero would mean "wait forever".  */
1994     itv.it_value.tv_usec = 1;
1995   setitimer (ITIMER_REAL, &itv, NULL);
1996 #else  /* not ITIMER_REAL */
1997   /* Use the old alarm() interface. */
1998   int secs = (int) timeout;
1999   if (secs == 0)
2000     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
2001        because alarm(0) means "never deliver the alarm", i.e. "wait
2002        forever", which is not what someone who specifies a 0.5s
2003        timeout would expect.  */
2004     secs = 1;
2005   alarm (secs);
2006 #endif /* not ITIMER_REAL */
2007 }
2008
2009 /* Cancel the alarm set with alarm_set. */
2010
2011 static void
2012 alarm_cancel (void)
2013 {
2014 #ifdef ITIMER_REAL
2015   struct itimerval disable;
2016   xzero (disable);
2017   setitimer (ITIMER_REAL, &disable, NULL);
2018 #else  /* not ITIMER_REAL */
2019   alarm (0);
2020 #endif /* not ITIMER_REAL */
2021 }
2022
2023 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2024    seconds.  Returns true if the function was interrupted with a
2025    timeout, false otherwise.
2026
2027    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2028    using setitimer() or alarm().  The timeout is enforced by
2029    longjumping out of the SIGALRM handler.  This has several
2030    advantages compared to the traditional approach of relying on
2031    signals causing system calls to exit with EINTR:
2032
2033      * The callback function is *forcibly* interrupted after the
2034        timeout expires, (almost) regardless of what it was doing and
2035        whether it was in a syscall.  For example, a calculation that
2036        takes a long time is interrupted as reliably as an IO
2037        operation.
2038
2039      * It works with both SYSV and BSD signals because it doesn't
2040        depend on the default setting of SA_RESTART.
2041
2042      * It doesn't require special handler setup beyond a simple call
2043        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2044        optional.)
2045
2046    The only downside is that, if FUN allocates internal resources that
2047    are normally freed prior to exit from the functions, they will be
2048    lost in case of timeout.  */
2049
2050 bool
2051 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2052 {
2053   int saved_errno;
2054
2055   if (timeout == 0)
2056     {
2057       fun (arg);
2058       return false;
2059     }
2060
2061   signal (SIGALRM, abort_run_with_timeout);
2062   if (SETJMP (run_with_timeout_env) != 0)
2063     {
2064       /* Longjumped out of FUN with a timeout. */
2065       signal (SIGALRM, SIG_DFL);
2066       return true;
2067     }
2068   alarm_set (timeout);
2069   fun (arg);
2070
2071   /* Preserve errno in case alarm() or signal() modifies it. */
2072   saved_errno = errno;
2073   alarm_cancel ();
2074   signal (SIGALRM, SIG_DFL);
2075   errno = saved_errno;
2076
2077   return false;
2078 }
2079
2080 #else  /* not USE_SIGNAL_TIMEOUT */
2081
2082 #ifndef WINDOWS
2083 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2084    define it under Windows, because Windows has its own version of
2085    run_with_timeout that uses threads.  */
2086
2087 bool
2088 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2089 {
2090   fun (arg);
2091   return false;
2092 }
2093 #endif /* not WINDOWS */
2094 #endif /* not USE_SIGNAL_TIMEOUT */
2095 \f
2096 #ifndef WINDOWS
2097
2098 /* Sleep the specified amount of seconds.  On machines without
2099    nanosleep(), this may sleep shorter if interrupted by signals.  */
2100
2101 void
2102 xsleep (double seconds)
2103 {
2104 #ifdef HAVE_NANOSLEEP
2105   /* nanosleep is the preferred interface because it offers high
2106      accuracy and, more importantly, because it allows us to reliably
2107      restart receiving a signal such as SIGWINCH.  (There was an
2108      actual Debian bug report about --limit-rate malfunctioning while
2109      the terminal was being resized.)  */
2110   struct timespec sleep, remaining;
2111   sleep.tv_sec = (long) seconds;
2112   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2113   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2114     /* If nanosleep has been interrupted by a signal, adjust the
2115        sleeping period and return to sleep.  */
2116     sleep = remaining;
2117 #elif defined(HAVE_USLEEP)
2118   /* If usleep is available, use it in preference to select.  */
2119   if (seconds >= 1)
2120     {
2121       /* On some systems, usleep cannot handle values larger than
2122          1,000,000.  If the period is larger than that, use sleep
2123          first, then add usleep for subsecond accuracy.  */
2124       sleep (seconds);
2125       seconds -= (long) seconds;
2126     }
2127   usleep (seconds * 1000000);
2128 #else /* fall back select */
2129   /* Note that, although Windows supports select, it can't be used to
2130      implement sleeping because Winsock's select doesn't implement
2131      timeout when it is passed NULL pointers for all fd sets.  (But it
2132      does under Cygwin, which implements Unix-compatible select.)  */
2133   struct timeval sleep;
2134   sleep.tv_sec = (long) seconds;
2135   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2136   select (0, NULL, NULL, NULL, &sleep);
2137   /* If select returns -1 and errno is EINTR, it means we were
2138      interrupted by a signal.  But without knowing how long we've
2139      actually slept, we can't return to sleep.  Using gettimeofday to
2140      track sleeps is slow and unreliable due to clock skew.  */
2141 #endif
2142 }
2143
2144 #endif /* not WINDOWS */
2145
2146 /* Encode the octets in DATA of length LENGTH to base64 format,
2147    storing the result to DEST.  The output will be zero-terminated,
2148    and must point to a writable buffer of at least
2149    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2150    the resulting base64 data, not counting the terminating zero.
2151
2152    This implementation does not emit newlines after 76 characters of
2153    base64 data.  */
2154
2155 int
2156 base64_encode (const void *data, int length, char *dest)
2157 {
2158   /* Conversion table.  */
2159   static const char tbl[64] = {
2160     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2161     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2162     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2163     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2164   };
2165   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2166      don't work for data with MSB set. */
2167   const unsigned char *s = data;
2168   /* Theoretical ANSI violation when length < 3. */
2169   const unsigned char *end = (const unsigned char *) data + length - 2;
2170   char *p = dest;
2171
2172   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2173   for (; s < end; s += 3)
2174     {
2175       *p++ = tbl[s[0] >> 2];
2176       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2177       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2178       *p++ = tbl[s[2] & 0x3f];
2179     }
2180
2181   /* Pad the result if necessary...  */
2182   switch (length % 3)
2183     {
2184     case 1:
2185       *p++ = tbl[s[0] >> 2];
2186       *p++ = tbl[(s[0] & 3) << 4];
2187       *p++ = '=';
2188       *p++ = '=';
2189       break;
2190     case 2:
2191       *p++ = tbl[s[0] >> 2];
2192       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2193       *p++ = tbl[((s[1] & 0xf) << 2)];
2194       *p++ = '=';
2195       break;
2196     }
2197   /* ...and zero-terminate it.  */
2198   *p = '\0';
2199
2200   return p - dest;
2201 }
2202
2203 /* Store in C the next non-whitespace character from the string, or \0
2204    when end of string is reached.  */
2205 #define NEXT_CHAR(c, p) do {                    \
2206   c = (unsigned char) *p++;                     \
2207 } while (c_isspace (c))
2208
2209 #define IS_ASCII(c) (((c) & 0x80) == 0)
2210
2211 /* Decode data from BASE64 (a null-terminated string) into memory
2212    pointed to by DEST.  DEST is assumed to be large enough to
2213    accomodate the decoded data, which is guaranteed to be no more than
2214    3/4*strlen(base64).
2215
2216    Since DEST is assumed to contain binary data, it is not
2217    NUL-terminated.  The function returns the length of the data
2218    written to TO.  -1 is returned in case of error caused by malformed
2219    base64 input.
2220
2221    This function originates from Free Recode.  */
2222
2223 int
2224 base64_decode (const char *base64, void *dest)
2225 {
2226   /* Table of base64 values for first 128 characters.  Note that this
2227      assumes ASCII (but so does Wget in other places).  */
2228   static const signed char base64_char_to_value[128] =
2229     {
2230       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2231       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2232       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2233       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2234       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2235       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2236       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2237       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2238       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2239       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2240       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2241       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2242       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2243     };
2244 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2245 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2246
2247   const char *p = base64;
2248   char *q = dest;
2249
2250   while (1)
2251     {
2252       unsigned char c;
2253       unsigned long value;
2254
2255       /* Process first byte of a quadruplet.  */
2256       NEXT_CHAR (c, p);
2257       if (!c)
2258         break;
2259       if (c == '=' || !IS_BASE64 (c))
2260         return -1;              /* illegal char while decoding base64 */
2261       value = BASE64_CHAR_TO_VALUE (c) << 18;
2262
2263       /* Process second byte of a quadruplet.  */
2264       NEXT_CHAR (c, p);
2265       if (!c)
2266         return -1;              /* premature EOF while decoding base64 */
2267       if (c == '=' || !IS_BASE64 (c))
2268         return -1;              /* illegal char while decoding base64 */
2269       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2270       *q++ = value >> 16;
2271
2272       /* Process third byte of a quadruplet.  */
2273       NEXT_CHAR (c, p);
2274       if (!c)
2275         return -1;              /* premature EOF while decoding base64 */
2276       if (!IS_BASE64 (c))
2277         return -1;              /* illegal char while decoding base64 */
2278
2279       if (c == '=')
2280         {
2281           NEXT_CHAR (c, p);
2282           if (!c)
2283             return -1;          /* premature EOF while decoding base64 */
2284           if (c != '=')
2285             return -1;          /* padding `=' expected but not found */
2286           continue;
2287         }
2288
2289       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2290       *q++ = 0xff & value >> 8;
2291
2292       /* Process fourth byte of a quadruplet.  */
2293       NEXT_CHAR (c, p);
2294       if (!c)
2295         return -1;              /* premature EOF while decoding base64 */
2296       if (c == '=')
2297         continue;
2298       if (!IS_BASE64 (c))
2299         return -1;              /* illegal char while decoding base64 */
2300
2301       value |= BASE64_CHAR_TO_VALUE (c);
2302       *q++ = 0xff & value;
2303     }
2304 #undef IS_BASE64
2305 #undef BASE64_CHAR_TO_VALUE
2306
2307   return q - (char *) dest;
2308 }
2309
2310 #undef IS_ASCII
2311 #undef NEXT_CHAR
2312 \f
2313 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2314    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2315
2316 static void
2317 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2318                     int (*cmpfun) (const void *, const void *))
2319 {
2320 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2321   if (from < to)
2322     {
2323       size_t i, j, k;
2324       size_t mid = (to + from) / 2;
2325       mergesort_internal (base, temp, size, from, mid, cmpfun);
2326       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2327       i = from;
2328       j = mid + 1;
2329       for (k = from; (i <= mid) && (j <= to); k++)
2330         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2331           memcpy (ELT (temp, k), ELT (base, i++), size);
2332         else
2333           memcpy (ELT (temp, k), ELT (base, j++), size);
2334       while (i <= mid)
2335         memcpy (ELT (temp, k++), ELT (base, i++), size);
2336       while (j <= to)
2337         memcpy (ELT (temp, k++), ELT (base, j++), size);
2338       for (k = from; k <= to; k++)
2339         memcpy (ELT (base, k), ELT (temp, k), size);
2340     }
2341 #undef ELT
2342 }
2343
2344 /* Stable sort with interface exactly like standard library's qsort.
2345    Uses mergesort internally, allocating temporary storage with
2346    alloca.  */
2347
2348 void
2349 stable_sort (void *base, size_t nmemb, size_t size,
2350              int (*cmpfun) (const void *, const void *))
2351 {
2352   if (size > 1)
2353     {
2354       void *temp = alloca (nmemb * size * sizeof (void *));
2355       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2356     }
2357 }
2358 \f
2359 /* Print a decimal number.  If it is equal to or larger than ten, the
2360    number is rounded.  Otherwise it is printed with one significant
2361    digit without trailing zeros and with no more than three fractional
2362    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2363    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2364
2365    This is useful for displaying durations because it provides
2366    order-of-magnitude information without unnecessary clutter --
2367    long-running downloads are shown without the fractional part, and
2368    short ones still retain one significant digit.  */
2369
2370 const char *
2371 print_decimal (double number)
2372 {
2373   static char buf[32];
2374   double n = number >= 0 ? number : -number;
2375
2376   if (n >= 9.95)
2377     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2378        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2379     snprintf (buf, sizeof buf, "%.0f", number);
2380   else if (n >= 0.95)
2381     snprintf (buf, sizeof buf, "%.1f", number);
2382   else if (n >= 0.001)
2383     snprintf (buf, sizeof buf, "%.1g", number);
2384   else if (n >= 0.0005)
2385     /* round [0.0005, 0.001) to 0.001 */
2386     snprintf (buf, sizeof buf, "%.3f", number);
2387   else
2388     /* print numbers close to 0 as 0, not 0.000 */
2389     strcpy (buf, "0");
2390
2391   return buf;
2392 }
2393
2394 #ifdef TESTING
2395
2396 const char *
2397 test_subdir_p()
2398 {
2399   int i;
2400   struct {
2401     char *d1;
2402     char *d2;
2403     bool result;
2404   } test_array[] = {
2405     { "/somedir", "/somedir", true },
2406     { "/somedir", "/somedir/d2", true },
2407     { "/somedir/d1", "/somedir", false },
2408   };
2409
2410   for (i = 0; i < countof(test_array); ++i)
2411     {
2412       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2413
2414       mu_assert ("test_subdir_p: wrong result",
2415                  res == test_array[i].result);
2416     }
2417
2418   return NULL;
2419 }
2420
2421 const char *
2422 test_dir_matches_p()
2423 {
2424   int i;
2425   struct {
2426     char *dirlist[3];
2427     char *dir;
2428     bool result;
2429   } test_array[] = {
2430     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2431     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2432     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2433     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2434     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2435     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2436     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2437     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2438     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2439     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2440     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2441     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2442     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2443     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2444     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2445   };
2446
2447   for (i = 0; i < countof(test_array); ++i)
2448     {
2449       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2450
2451       mu_assert ("test_dir_matches_p: wrong result",
2452                  res == test_array[i].result);
2453     }
2454
2455   return NULL;
2456 }
2457
2458 #endif /* TESTING */
2459