sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #include <errno.h>
  46 #include <fcntl.h>
  47 #include <assert.h>
  48 #include <stdarg.h>
  49 #include <locale.h>
  50
  51 #if HAVE_UTIME
  52 # include <sys/types.h>
  53 # ifdef HAVE_UTIME_H
  54 #  include <utime.h>
  55 # endif
  56
  57 # ifdef HAVE_SYS_UTIME_H
  58 #  include <sys/utime.h>
  59 # endif
  60 #endif
  61
  62 #include <sys/time.h>
  63
  64 #include <sys/stat.h>
  65
  66 /* For TIOCGWINSZ and friends: */
  67 #include <sys/ioctl.h>
  68 #ifdef HAVE_TERMIOS_H
  69 # include <termios.h>
  70 #endif
  71
  72 /* Needed for Unix version of run_with_timeout. */
  73 #include <signal.h>
  74 #include <setjmp.h>
  75
  76 #include <regex.h>
  77 #ifdef HAVE_LIBPCRE
  78 # include <pcre.h>
  79 #endif
  80
  81 #ifndef HAVE_SIGSETJMP
  82 /* If sigsetjmp is a macro, configure won't pick it up. */
  83 # ifdef sigsetjmp
  84 #  define HAVE_SIGSETJMP
  85 # endif
  86 #endif
  87
  88 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  89 # define USE_SIGNAL_TIMEOUT
  90 #endif
  91
  92 #include "utils.h"
  93 #include "hash.h"
  94
  95 #ifdef __VMS
  96 #include "vms.h"
  97 #endif /* def __VMS */
  98
  99 #ifdef TESTING
 100 #include "test.h"
 101 #endif
 102
 103 static void
 104 memfatal (const char *context, long attempted_size)
 105 {
 106   /* Make sure we don't try to store part of the log line, and thus
 107      call malloc.  */
 108   log_set_save_context (false);
 109
 110   /* We have different log outputs in different situations:
 111      1) output without bytes information
 112      2) output with bytes information  */
 113   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 114     {
 115       logprintf (LOG_ALWAYS,
 116                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 117                  exec_name, context);
 118     }
 119   else
 120     {
 121       logprintf (LOG_ALWAYS,
 122                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 123                  exec_name, context, attempted_size);
 124     }
 125
 126   exit (1);
 127 }
 128
 129 /* Character property table for (re-)escaping VMS ODS5 extended file
 130    names.  Note that this table ignores Unicode.
 131
 132    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 133
 134    ODS5 Invalid characters:
 135       C0 control codes (0x00 to 0x1F inclusive)
 136       Asterisk (*)
 137       Question mark (?)
 138
 139    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 140       Double quotation marks (")
 141       Backslash (\)
 142       Colon (:)
 143       Left angle bracket (<)
 144       Right angle bracket (>)
 145       Slash (/)
 146       Vertical bar (|)
 147
 148    Characters escaped by "^":
 149       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 150        @  [  \  ]  ^  `  {  |  }  ~
 151
 152    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 153    case.  Note that un-escaped < and > can also confuse a directory
 154    spec.
 155
 156    Characters put out as ^xx:
 157       7F (DEL)
 158       80-9F (C1 control characters)
 159       A0 (nonbreaking space)
 160       FF (Latin small letter y diaeresis)
 161
 162    Other cases:
 163       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 164
 165     Property table values:
 166       Normal escape:    1
 167       Space:            2
 168       Dot:              4
 169       Hex-hex escape:   8
 170       ODS2 normal:     16
 171       ODS2 lower case: 32
 172       Hex digit:       64
 173 */
 174
 175 unsigned char char_prop[ 256] = {
 176
 177 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 178     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 179
 180 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 181     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 182
 183 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 184     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 185
 186 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 187    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 188
 189 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 190     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 191
 192 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 193    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 194
 195 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 196     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 197
 198 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 199    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 200
 201     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 202     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 203     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 204     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 205     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 206     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 207     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 208     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 209 };
 210
 211 /* Utility function: like xstrdup(), but also lowercases S.  */
 212
 213 char *
 214 xstrdup_lower (const char *s)
 215 {
 216   char *copy = xstrdup (s);
 217   char *p = copy;
 218   for (; *p; p++)
 219     *p = c_tolower (*p);
 220   return copy;
 221 }
 222
 223 /* Copy the string formed by two pointers (one on the beginning, other
 224    on the char after the last char) to a new, malloc-ed location.
 225    0-terminate it.  */
 226 char *
 227 strdupdelim (const char *beg, const char *end)
 228 {
 229   char *res = xmalloc (end - beg + 1);
 230   memcpy (res, beg, end - beg);
 231   res[end - beg] = '\0';
 232   return res;
 233 }
 234
 235 /* Parse a string containing comma-separated elements, and return a
 236    vector of char pointers with the elements.  Spaces following the
 237    commas are ignored.  */
 238 char **
 239 sepstring (const char *s)
 240 {
 241   char **res;
 242   const char *p;
 243   int i = 0;
 244
 245   if (!s || !*s)
 246     return NULL;
 247   res = NULL;
 248   p = s;
 249   while (*s)
 250     {
 251       if (*s == ',')
 252         {
 253           res = xrealloc (res, (i + 2) * sizeof (char *));
 254           res[i] = strdupdelim (p, s);
 255           res[++i] = NULL;
 256           ++s;
 257           /* Skip the blanks following the ','.  */
 258           while (c_isspace (*s))
 259             ++s;
 260           p = s;
 261         }
 262       else
 263         ++s;
 264     }
 265   res = xrealloc (res, (i + 2) * sizeof (char *));
 266   res[i] = strdupdelim (p, s);
 267   res[i + 1] = NULL;
 268   return res;
 269 }
 270 \f
 271 /* Like sprintf, but prints into a string of sufficient size freshly
 272    allocated with malloc, which is returned.  If unable to print due
 273    to invalid format, returns NULL.  Inability to allocate needed
 274    memory results in abort, as with xmalloc.  This is in spirit
 275    similar to the GNU/BSD extension asprintf, but somewhat easier to
 276    use.
 277
 278    Internally the function either calls vasprintf or loops around
 279    vsnprintf until the correct size is found.  Since Wget also ships a
 280    fallback implementation of vsnprintf, this should be portable.  */
 281
 282 /* Constant is using for limits memory allocation for text buffer.
 283    Applicable in situation when: vasprintf is not available in the system
 284    and vsnprintf return -1 when long line is truncated (in old versions of
 285    glibc and in other system where C99 doesn`t support) */
 286
 287 #define FMT_MAX_LENGTH 1048576
 288
 289 char *
 290 aprintf (const char *fmt, ...)
 291 {
 292 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 293   /* Use vasprintf. */
 294   int ret;
 295   va_list args;
 296   char *str;
 297   va_start (args, fmt);
 298   ret = vasprintf (&str, fmt, args);
 299   va_end (args);
 300   if (ret < 0 && errno == ENOMEM)
 301     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 302                                                       with xmalloc/xrealloc */
 303   else if (ret < 0)
 304     return NULL;
 305   return str;
 306 #else  /* not HAVE_VASPRINTF */
 307
 308   /* vasprintf is unavailable.  snprintf into a small buffer and
 309      resize it as necessary. */
 310   int size = 32;
 311   char *str = xmalloc (size);
 312
 313   /* #### This code will infloop and eventually abort in xrealloc if
 314      passed a FMT that causes snprintf to consistently return -1.  */
 315
 316   while (1)
 317     {
 318       int n;
 319       va_list args;
 320
 321       va_start (args, fmt);
 322       n = vsnprintf (str, size, fmt, args);
 323       va_end (args);
 324
 325       /* If the printing worked, return the string. */
 326       if (n > -1 && n < size)
 327         return str;
 328
 329       /* Else try again with a larger buffer. */
 330       if (n > -1)               /* C99 */
 331         size = n + 1;           /* precisely what is needed */
 332       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 333         {                               /* maybe we have some wrong
 334                                            format string? */
 335           logprintf (LOG_ALWAYS,
 336                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 337                        "aborting.\n"),
 338                      exec_name, size);  /* printout a log message */
 339           abort ();                     /* and abort... */
 340         }
 341       else
 342         {
 343           /* else, we continue to grow our
 344            * buffer: Twice the old size. */
 345           size <<= 1;
 346         }
 347       str = xrealloc (str, size);
 348     }
 349 #endif /* not HAVE_VASPRINTF */
 350 }
 351
 352 /* Concatenate the NULL-terminated list of string arguments into
 353    freshly allocated space.  */
 354
 355 char *
 356 concat_strings (const char *str0, ...)
 357 {
 358   va_list args;
 359   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 360   char *ret, *p;
 361
 362   const char *next_str;
 363   int total_length = 0;
 364   size_t argcount;
 365
 366   /* Calculate the length of and allocate the resulting string. */
 367
 368   argcount = 0;
 369   va_start (args, str0);
 370   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 371     {
 372       int len = strlen (next_str);
 373       if (argcount < countof (saved_lengths))
 374         saved_lengths[argcount++] = len;
 375       total_length += len;
 376     }
 377   va_end (args);
 378   p = ret = xmalloc (total_length + 1);
 379
 380   /* Copy the strings into the allocated space. */
 381
 382   argcount = 0;
 383   va_start (args, str0);
 384   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 385     {
 386       int len;
 387       if (argcount < countof (saved_lengths))
 388         len = saved_lengths[argcount++];
 389       else
 390         len = strlen (next_str);
 391       memcpy (p, next_str, len);
 392       p += len;
 393     }
 394   va_end (args);
 395   *p = '\0';
 396
 397   return ret;
 398 }
 399 \f
 400 /* Format the provided time according to the specified format.  The
 401    format is a string with format elements supported by strftime.  */
 402
 403 static char *
 404 fmttime (time_t t, const char *fmt)
 405 {
 406   static char output[32];
 407   struct tm *tm = localtime(&t);
 408   if (!tm)
 409     abort ();
 410   if (!strftime(output, sizeof(output), fmt, tm))
 411     abort ();
 412   return output;
 413 }
 414
 415 /* Return pointer to a static char[] buffer in which zero-terminated
 416    string-representation of TM (in form hh:mm:ss) is printed.
 417
 418    If TM is NULL, the current time will be used.  */
 419
 420 char *
 421 time_str (time_t t)
 422 {
 423   return fmttime(t, "%H:%M:%S");
 424 }
 425
 426 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 427
 428 char *
 429 datetime_str (time_t t)
 430 {
 431   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 432 }
 433 \f
 434 /* The Windows versions of the following two functions are defined in
 435    mswindows.c. On MSDOS this function should never be called. */
 436
 437 #ifdef __VMS
 438
 439 void
 440 fork_to_background (void)
 441 {
 442   return;
 443 }
 444
 445 #else /* def __VMS */
 446
 447 #if !defined(WINDOWS) && !defined(MSDOS)
 448 void
 449 fork_to_background (void)
 450 {
 451   pid_t pid;
 452   /* Whether we arrange our own version of opt.lfilename here.  */
 453   bool logfile_changed = false;
 454
 455   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 456     {
 457       /* We must create the file immediately to avoid either a race
 458          condition (which arises from using unique_name and failing to
 459          use fopen_excl) or lying to the user about the log file name
 460          (which arises from using unique_name, printing the name, and
 461          using fopen_excl later on.)  */
 462       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 463       if (new_log_fp)
 464         {
 465           logfile_changed = true;
 466           fclose (new_log_fp);
 467         }
 468     }
 469   pid = fork ();
 470   if (pid < 0)
 471     {
 472       /* parent, error */
 473       perror ("fork");
 474       exit (1);
 475     }
 476   else if (pid != 0)
 477     {
 478       /* parent, no error */
 479       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 480       if (logfile_changed)
 481         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 482       exit (0);                 /* #### should we use _exit()? */
 483     }
 484
 485   /* child: give up the privileges and keep running. */
 486   setsid ();
 487   freopen ("/dev/null", "r", stdin);
 488   freopen ("/dev/null", "w", stdout);
 489   freopen ("/dev/null", "w", stderr);
 490 }
 491 #endif /* !WINDOWS && !MSDOS */
 492
 493 #endif /* def __VMS [else] */
 494
 495 \f
 496 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 497    specified with TM.  The atime ("access time") is set to the current
 498    time.  */
 499
 500 void
 501 touch (const char *file, time_t tm)
 502 {
 503 #if HAVE_UTIME
 504 # ifdef HAVE_STRUCT_UTIMBUF
 505   struct utimbuf times;
 506 # else
 507   struct {
 508     time_t actime;
 509     time_t modtime;
 510   } times;
 511 # endif
 512   times.modtime = tm;
 513   times.actime = time (NULL);
 514   if (utime (file, &times) == -1)
 515     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 516 #else
 517   struct timespec timespecs[2];
 518   int fd;
 519
 520   fd = open (file, O_WRONLY);
 521   if (fd < 0)
 522     {
 523       logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
 524       return;
 525     }
 526
 527   timespecs[0].tv_sec = time (NULL);
 528   timespecs[0].tv_nsec = 0L;
 529   timespecs[1].tv_sec = tm;
 530   timespecs[1].tv_nsec = 0L;
 531
 532   if (futimens (fd, timespecs) == -1)
 533     logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
 534
 535   close (fd);
 536 #endif
 537 }
 538
 539 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 540    nothing under MS-Windows.  */
 541 int
 542 remove_link (const char *file)
 543 {
 544   int err = 0;
 545   struct_stat st;
 546
 547   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 548     {
 549       DEBUGP (("Unlinking %s (symlink).\n", file));
 550       err = unlink (file);
 551       if (err != 0)
 552         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 553                    quote (file), strerror (errno));
 554     }
 555   return err;
 556 }
 557
 558 /* Does FILENAME exist?  This is quite a lousy implementation, since
 559    it supplies no error codes -- only a yes-or-no answer.  Thus it
 560    will return that a file does not exist if, e.g., the directory is
 561    unreadable.  I don't mind it too much currently, though.  The
 562    proper way should, of course, be to have a third, error state,
 563    other than true/false, but that would introduce uncalled-for
 564    additional complexity to the callers.  */
 565 bool
 566 file_exists_p (const char *filename)
 567 {
 568 #ifdef HAVE_ACCESS
 569   return access (filename, F_OK) >= 0;
 570 #else
 571   struct_stat buf;
 572   return stat (filename, &buf) >= 0;
 573 #endif
 574 }
 575
 576 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 577    Returns 0 on error.  */
 578 bool
 579 file_non_directory_p (const char *path)
 580 {
 581   struct_stat buf;
 582   /* Use lstat() rather than stat() so that symbolic links pointing to
 583      directories can be identified correctly.  */
 584   if (lstat (path, &buf) != 0)
 585     return false;
 586   return S_ISDIR (buf.st_mode) ? false : true;
 587 }
 588
 589 /* Return the size of file named by FILENAME, or -1 if it cannot be
 590    opened or seeked into. */
 591 wgint
 592 file_size (const char *filename)
 593 {
 594 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 595   wgint size;
 596   /* We use fseek rather than stat to determine the file size because
 597      that way we can also verify that the file is readable without
 598      explicitly checking for permissions.  Inspired by the POST patch
 599      by Arnaud Wylie.  */
 600   FILE *fp = fopen (filename, "rb");
 601   if (!fp)
 602     return -1;
 603   fseeko (fp, 0, SEEK_END);
 604   size = ftello (fp);
 605   fclose (fp);
 606   return size;
 607 #else
 608   struct_stat st;
 609   if (stat (filename, &st) < 0)
 610     return -1;
 611   return st.st_size;
 612 #endif
 613 }
 614
 615 /* 2005-02-19 SMS.
 616    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 617    original name.  With the VMS file systems' versioning, everything
 618    should be fine, and appending ".NN" just causes trouble.
 619 */
 620
 621 #ifdef UNIQ_SEP
 622
 623 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 624    doesn't exist is found.  Return a freshly allocated copy of the
 625    unused file name.  */
 626
 627 static char *
 628 unique_name_1 (const char *prefix)
 629 {
 630   int count = 1;
 631   int plen = strlen (prefix);
 632   char *template = (char *)alloca (plen + 1 + 24);
 633   char *template_tail = template + plen;
 634
 635   memcpy (template, prefix, plen);
 636   *template_tail++ = UNIQ_SEP;
 637
 638   do
 639     number_to_string (template_tail, count++);
 640   while (file_exists_p (template));
 641
 642   return xstrdup (template);
 643 }
 644
 645 /* Return a unique file name, based on FILE.
 646
 647    More precisely, if FILE doesn't exist, it is returned unmodified.
 648    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 649    file name that doesn't exist is returned.
 650
 651    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 652
 653    The resulting file is not created, only verified that it didn't
 654    exist at the point in time when the function was called.
 655    Therefore, where security matters, don't rely that the file created
 656    by this function exists until you open it with O_EXCL or
 657    equivalent.
 658
 659    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 660    string.  Otherwise, it may return FILE if the file doesn't exist
 661    (and therefore doesn't need changing).  */
 662
 663 char *
 664 unique_name (const char *file, bool allow_passthrough)
 665 {
 666   /* If the FILE itself doesn't exist, return it without
 667      modification. */
 668   if (!file_exists_p (file))
 669     return allow_passthrough ? (char *)file : xstrdup (file);
 670
 671   /* Otherwise, find a numeric suffix that results in unused file name
 672      and return it.  */
 673   return unique_name_1 (file);
 674 }
 675
 676 #else /* def UNIQ_SEP */
 677
 678 /* Dummy unique_name() for VMS.  Return the original name as easily as
 679    possible.
 680 */
 681 char *
 682 unique_name (const char *file, bool allow_passthrough)
 683 {
 684   /* Return the FILE itself, without modification, irregardful. */
 685   return allow_passthrough ? (char *)file : xstrdup (file);
 686 }
 687
 688 #endif /* def UNIQ_SEP [else] */
 689
 690 /* Create a file based on NAME, except without overwriting an existing
 691    file with that name.  Providing O_EXCL is correctly implemented,
 692    this function does not have the race condition associated with
 693    opening the file returned by unique_name.  */
 694
 695 FILE *
 696 unique_create (const char *name, bool binary, char **opened_name)
 697 {
 698   /* unique file name, based on NAME */
 699   char *uname = unique_name (name, false);
 700   FILE *fp;
 701   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 702     {
 703       xfree (uname);
 704       uname = unique_name (name, false);
 705     }
 706   if (opened_name && fp != NULL)
 707     {
 708       if (fp)
 709         *opened_name = uname;
 710       else
 711         {
 712           *opened_name = NULL;
 713           xfree (uname);
 714         }
 715     }
 716   else
 717     xfree (uname);
 718   return fp;
 719 }
 720
 721 /* Open the file for writing, with the addition that the file is
 722    opened "exclusively".  This means that, if the file already exists,
 723    this function will *fail* and errno will be set to EEXIST.  If
 724    BINARY is set, the file will be opened in binary mode, equivalent
 725    to fopen's "wb".
 726
 727    If opening the file fails for any reason, including the file having
 728    previously existed, this function returns NULL and sets errno
 729    appropriately.  */
 730
 731 FILE *
 732 fopen_excl (const char *fname, int binary)
 733 {
 734   int fd;
 735 #ifdef O_EXCL
 736
 737 /* 2005-04-14 SMS.
 738    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 739    It also has file versions which obviate all the O_EXCL effort.
 740    O_TRUNC (something of a misnomer) requests a new version.
 741 */
 742 # ifdef __VMS
 743 /* Common open() optional arguments:
 744    sequential access only, access callback function.
 745 */
 746 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 747
 748   int open_id;
 749   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 750
 751   if (binary > 1)
 752     {
 753       open_id = 11;
 754       fd = open( fname,                 /* File name. */
 755        flags,                           /* Flags. */
 756        0777,                            /* Mode for default protection. */
 757        "ctx=bin,stm",                   /* Binary, stream access. */
 758        "rfm=stmlf",                     /* Stream_LF. */
 759        OPEN_OPT_ARGS);                  /* Access callback. */
 760     }
 761   else if (binary)
 762     {
 763       open_id = 12;
 764       fd = open( fname,                 /* File name. */
 765        flags,                           /* Flags. */
 766        0777,                            /* Mode for default protection. */
 767        "ctx=bin,stm",                   /* Binary, stream access. */
 768        "rfm=fix",                       /* Fixed-length, */
 769        "mrs=512",                       /* 512-byte records. */
 770        OPEN_OPT_ARGS);                  /* Access callback. */
 771     }
 772   else
 773     {
 774       open_id = 13;
 775       fd = open( fname,                 /* File name. */
 776        flags,                           /* Flags. */
 777        0777,                            /* Mode for default protection. */
 778        "rfm=stmlf",                     /* Stream_LF. */
 779        OPEN_OPT_ARGS);                  /* Access callback. */
 780     }
 781 # else /* def __VMS */
 782   int flags = O_WRONLY | O_CREAT | O_EXCL;
 783 # ifdef O_BINARY
 784   if (binary)
 785     flags |= O_BINARY;
 786 # endif
 787   fd = open (fname, flags, 0666);
 788 # endif /* def __VMS [else] */
 789
 790   if (fd < 0)
 791     return NULL;
 792   return fdopen (fd, binary ? "wb" : "w");
 793 #else  /* not O_EXCL */
 794   /* Manually check whether the file exists.  This is prone to race
 795      conditions, but systems without O_EXCL haven't deserved
 796      better.  */
 797   if (file_exists_p (fname))
 798     {
 799       errno = EEXIST;
 800       return NULL;
 801     }
 802   return fopen (fname, binary ? "wb" : "w");
 803 #endif /* not O_EXCL */
 804 }
 805 \f
 806 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 807    are missing, create them first.  In case any mkdir() call fails,
 808    return its error status.  Returns 0 on successful completion.
 809
 810    The behaviour of this function should be identical to the behaviour
 811    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 812 int
 813 make_directory (const char *directory)
 814 {
 815   int i, ret, quit = 0;
 816   char *dir;
 817
 818   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 819      function is unsafe if called with a read-only char *argument.  */
 820   STRDUP_ALLOCA (dir, directory);
 821
 822   /* If the first character of dir is '/', skip it (and thus enable
 823      creation of absolute-pathname directories.  */
 824   for (i = (*dir == '/'); 1; ++i)
 825     {
 826       for (; dir[i] && dir[i] != '/'; i++)
 827         ;
 828       if (!dir[i])
 829         quit = 1;
 830       dir[i] = '\0';
 831       /* Check whether the directory already exists.  Allow creation of
 832          of intermediate directories to fail, as the initial path components
 833          are not necessarily directories!  */
 834       if (!file_exists_p (dir))
 835         ret = mkdir (dir, 0777);
 836       else
 837         ret = 0;
 838       if (quit)
 839         break;
 840       else
 841         dir[i] = '/';
 842     }
 843   return ret;
 844 }
 845
 846 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 847    should be a file name.
 848
 849    file_merge("/foo/bar", "baz")  => "/foo/baz"
 850    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 851    file_merge("foo", "bar")       => "bar"
 852
 853    In other words, it's a simpler and gentler version of uri_merge.  */
 854
 855 char *
 856 file_merge (const char *base, const char *file)
 857 {
 858   char *result;
 859   const char *cut = (const char *)strrchr (base, '/');
 860
 861   if (!cut)
 862     return xstrdup (file);
 863
 864   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 865   memcpy (result, base, cut - base);
 866   result[cut - base] = '/';
 867   strcpy (result + (cut - base) + 1, file);
 868
 869   return result;
 870 }
 871 \f
 872 /* Like fnmatch, but performs a case-insensitive match.  */
 873
 874 int
 875 fnmatch_nocase (const char *pattern, const char *string, int flags)
 876 {
 877 #ifdef FNM_CASEFOLD
 878   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 879      also present on *BSD platforms, and possibly elsewhere.  */
 880   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 881 #else
 882   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 883   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 884   char *strcopy = (char *) alloca (strlen (string) + 1);
 885   char *p;
 886   for (p = patcopy; *pattern; pattern++, p++)
 887     *p = c_tolower (*pattern);
 888   *p = '\0';
 889   for (p = strcopy; *string; string++, p++)
 890     *p = c_tolower (*string);
 891   *p = '\0';
 892   return fnmatch (patcopy, strcopy, flags);
 893 #endif
 894 }
 895
 896 static bool in_acclist (const char *const *, const char *, bool);
 897
 898 /* Determine whether a file is acceptable to be followed, according to
 899    lists of patterns to accept/reject.  */
 900 bool
 901 acceptable (const char *s)
 902 {
 903   int l = strlen (s);
 904
 905   if (opt.output_document && strcmp (s, opt.output_document) == 0)
 906     return true;
 907
 908   while (l && s[l] != '/')
 909     --l;
 910   if (s[l] == '/')
 911     s += (l + 1);
 912   if (opt.accepts)
 913     {
 914       if (opt.rejects)
 915         return (in_acclist ((const char *const *)opt.accepts, s, true)
 916                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 917       else
 918         return in_acclist ((const char *const *)opt.accepts, s, true);
 919     }
 920   else if (opt.rejects)
 921     return !in_acclist ((const char *const *)opt.rejects, s, true);
 922   return true;
 923 }
 924
 925 /* Determine whether an URL is acceptable to be followed, according to
 926    regex patterns to accept/reject.  */
 927 bool
 928 accept_url (const char *s)
 929 {
 930   if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
 931     return false;
 932   if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
 933     return false;
 934
 935   return true;
 936 }
 937
 938 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 939    will return true if and only if D2 begins with `/something/' or is exactly
 940    '/something'.  */
 941 bool
 942 subdir_p (const char *d1, const char *d2)
 943 {
 944   if (*d1 == '\0')
 945     return true;
 946   if (!opt.ignore_case)
 947     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 948       ;
 949   else
 950     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 951       ;
 952
 953   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 954 }
 955
 956 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 957    first element that matches DIR, through wildcards or front comparison (as
 958    appropriate).  */
 959 static bool
 960 dir_matches_p (char **dirlist, const char *dir)
 961 {
 962   char **x;
 963   int (*matcher) (const char *, const char *, int)
 964     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 965
 966   for (x = dirlist; *x; x++)
 967     {
 968       /* Remove leading '/' */
 969       char *p = *x + (**x == '/');
 970       if (has_wildcards_p (p))
 971         {
 972           if (matcher (p, dir, FNM_PATHNAME) == 0)
 973             break;
 974         }
 975       else
 976         {
 977           if (subdir_p (p, dir))
 978             break;
 979         }
 980     }
 981
 982   return *x ? true : false;
 983 }
 984
 985 /* Returns whether DIRECTORY is acceptable for download, wrt the
 986    include/exclude lists.
 987
 988    The leading `/' is ignored in paths; relative and absolute paths
 989    may be freely intermixed.  */
 990
 991 bool
 992 accdir (const char *directory)
 993 {
 994   /* Remove starting '/'.  */
 995   if (*directory == '/')
 996     ++directory;
 997   if (opt.includes)
 998     {
 999       if (!dir_matches_p (opt.includes, directory))
1000         return false;
1001     }
1002   if (opt.excludes)
1003     {
1004       if (dir_matches_p (opt.excludes, directory))
1005         return false;
1006     }
1007   return true;
1008 }
1009
1010 /* Return true if STRING ends with TAIL.  For instance:
1011
1012    match_tail ("abc", "bc", false)  -> 1
1013    match_tail ("abc", "ab", false)  -> 0
1014    match_tail ("abc", "abc", false) -> 1
1015
1016    If FOLD_CASE is true, the comparison will be case-insensitive.  */
1017
1018 bool
1019 match_tail (const char *string, const char *tail, bool fold_case)
1020 {
1021   int i, j;
1022
1023   /* We want this to be fast, so we code two loops, one with
1024      case-folding, one without. */
1025
1026   if (!fold_case)
1027     {
1028       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1029         if (string[i] != tail[j])
1030           break;
1031     }
1032   else
1033     {
1034       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1035         if (c_tolower (string[i]) != c_tolower (tail[j]))
1036           break;
1037     }
1038
1039   /* If the tail was exhausted, the match was succesful.  */
1040   if (j == -1)
1041     return true;
1042   else
1043     return false;
1044 }
1045
1046 /* Checks whether string S matches each element of ACCEPTS.  A list
1047    element are matched either with fnmatch() or match_tail(),
1048    according to whether the element contains wildcards or not.
1049
1050    If the BACKWARD is false, don't do backward comparison -- just compare
1051    them normally.  */
1052 static bool
1053 in_acclist (const char *const *accepts, const char *s, bool backward)
1054 {
1055   for (; *accepts; accepts++)
1056     {
1057       if (has_wildcards_p (*accepts))
1058         {
1059           int res = opt.ignore_case
1060             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1061           /* fnmatch returns 0 if the pattern *does* match the string.  */
1062           if (res == 0)
1063             return true;
1064         }
1065       else
1066         {
1067           if (backward)
1068             {
1069               if (match_tail (s, *accepts, opt.ignore_case))
1070                 return true;
1071             }
1072           else
1073             {
1074               int cmp = opt.ignore_case
1075                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1076               if (cmp == 0)
1077                 return true;
1078             }
1079         }
1080     }
1081   return false;
1082 }
1083
1084 /* Return the location of STR's suffix (file extension).  Examples:
1085    suffix ("foo.bar")       -> "bar"
1086    suffix ("foo.bar.baz")   -> "baz"
1087    suffix ("/foo/bar")      -> NULL
1088    suffix ("/foo.bar/baz")  -> NULL  */
1089 char *
1090 suffix (const char *str)
1091 {
1092   int i;
1093
1094   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
1095     ;
1096
1097   if (str[i++] == '.')
1098     return (char *)str + i;
1099   else
1100     return NULL;
1101 }
1102
1103 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1104    `]').  */
1105
1106 bool
1107 has_wildcards_p (const char *s)
1108 {
1109   for (; *s; s++)
1110     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
1111       return true;
1112   return false;
1113 }
1114
1115 /* Return true if FNAME ends with a typical HTML suffix.  The
1116    following (case-insensitive) suffixes are presumed to be HTML
1117    files:
1118
1119      html
1120      htm
1121      ?html (`?' matches one character)
1122
1123    #### CAVEAT.  This is not necessarily a good indication that FNAME
1124    refers to a file that contains HTML!  */
1125 bool
1126 has_html_suffix_p (const char *fname)
1127 {
1128   char *suf;
1129
1130   if ((suf = suffix (fname)) == NULL)
1131     return false;
1132   if (!strcasecmp (suf, "html"))
1133     return true;
1134   if (!strcasecmp (suf, "htm"))
1135     return true;
1136   if (suf[0] && !strcasecmp (suf + 1, "html"))
1137     return true;
1138   return false;
1139 }
1140
1141 /* Read a line from FP and return the pointer to freshly allocated
1142    storage.  The storage space is obtained through malloc() and should
1143    be freed with free() when it is no longer needed.
1144
1145    The length of the line is not limited, except by available memory.
1146    The newline character at the end of line is retained.  The line is
1147    terminated with a zero character.
1148
1149    After end-of-file is encountered without anything being read, NULL
1150    is returned.  NULL is also returned on error.  To distinguish
1151    between these two cases, use the stdio function ferror().  */
1152
1153 char *
1154 read_whole_line (FILE *fp)
1155 {
1156   int length = 0;
1157   int bufsize = 82;
1158   char *line = xmalloc (bufsize);
1159
1160   while (fgets (line + length, bufsize - length, fp))
1161     {
1162       length += strlen (line + length);
1163       if (length == 0)
1164         /* Possible for example when reading from a binary file where
1165            a line begins with \0.  */
1166         continue;
1167
1168       if (line[length - 1] == '\n')
1169         break;
1170
1171       /* fgets() guarantees to read the whole line, or to use up the
1172          space we've given it.  We can double the buffer
1173          unconditionally.  */
1174       bufsize <<= 1;
1175       line = xrealloc (line, bufsize);
1176     }
1177   if (length == 0 || ferror (fp))
1178     {
1179       xfree (line);
1180       return NULL;
1181     }
1182   if (length + 1 < bufsize)
1183     /* Relieve the memory from our exponential greediness.  We say
1184        `length + 1' because the terminating \0 is not included in
1185        LENGTH.  We don't need to zero-terminate the string ourselves,
1186        though, because fgets() does that.  */
1187     line = xrealloc (line, length + 1);
1188   return line;
1189 }
1190 \f
1191 /* Read FILE into memory.  A pointer to `struct file_memory' are
1192    returned; use struct element `content' to access file contents, and
1193    the element `length' to know the file length.  `content' is *not*
1194    zero-terminated, and you should *not* read or write beyond the [0,
1195    length) range of characters.
1196
1197    After you are done with the file contents, call wget_read_file_free to
1198    release the memory.
1199
1200    Depending on the operating system and the type of file that is
1201    being read, wget_read_file() either mmap's the file into memory, or
1202    reads the file into the core using read().
1203
1204    If file is named "-", fileno(stdin) is used for reading instead.
1205    If you want to read from a real file named "-", use "./-" instead.  */
1206
1207 struct file_memory *
1208 wget_read_file (const char *file)
1209 {
1210   int fd;
1211   struct file_memory *fm;
1212   long size;
1213   bool inhibit_close = false;
1214
1215   /* Some magic in the finest tradition of Perl and its kin: if FILE
1216      is "-", just use stdin.  */
1217   if (HYPHENP (file))
1218     {
1219       fd = fileno (stdin);
1220       inhibit_close = true;
1221       /* Note that we don't inhibit mmap() in this case.  If stdin is
1222          redirected from a regular file, mmap() will still work.  */
1223     }
1224   else
1225     fd = open (file, O_RDONLY);
1226   if (fd < 0)
1227     return NULL;
1228   fm = xnew (struct file_memory);
1229
1230 #ifdef HAVE_MMAP
1231   {
1232     struct_fstat buf;
1233     if (fstat (fd, &buf) < 0)
1234       goto mmap_lose;
1235     fm->length = buf.st_size;
1236     /* NOTE: As far as I know, the callers of this function never
1237        modify the file text.  Relying on this would enable us to
1238        specify PROT_READ and MAP_SHARED for a marginal gain in
1239        efficiency, but at some cost to generality.  */
1240     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1241                         MAP_PRIVATE, fd, 0);
1242     if (fm->content == (char *)MAP_FAILED)
1243       goto mmap_lose;
1244     if (!inhibit_close)
1245       close (fd);
1246
1247     fm->mmap_p = 1;
1248     return fm;
1249   }
1250
1251  mmap_lose:
1252   /* The most common reason why mmap() fails is that FD does not point
1253      to a plain file.  However, it's also possible that mmap() doesn't
1254      work for a particular type of file.  Therefore, whenever mmap()
1255      fails, we just fall back to the regular method.  */
1256 #endif /* HAVE_MMAP */
1257
1258   fm->length = 0;
1259   size = 512;                   /* number of bytes fm->contents can
1260                                    hold at any given time. */
1261   fm->content = xmalloc (size);
1262   while (1)
1263     {
1264       wgint nread;
1265       if (fm->length > size / 2)
1266         {
1267           /* #### I'm not sure whether the whole exponential-growth
1268              thing makes sense with kernel read.  On Linux at least,
1269              read() refuses to read more than 4K from a file at a
1270              single chunk anyway.  But other Unixes might optimize it
1271              better, and it doesn't *hurt* anything, so I'm leaving
1272              it.  */
1273
1274           /* Normally, we grow SIZE exponentially to make the number
1275              of calls to read() and realloc() logarithmic in relation
1276              to file size.  However, read() can read an amount of data
1277              smaller than requested, and it would be unreasonable to
1278              double SIZE every time *something* was read.  Therefore,
1279              we double SIZE only when the length exceeds half of the
1280              entire allocated size.  */
1281           size <<= 1;
1282           fm->content = xrealloc (fm->content, size);
1283         }
1284       nread = read (fd, fm->content + fm->length, size - fm->length);
1285       if (nread > 0)
1286         /* Successful read. */
1287         fm->length += nread;
1288       else if (nread < 0)
1289         /* Error. */
1290         goto lose;
1291       else
1292         /* EOF */
1293         break;
1294     }
1295   if (!inhibit_close)
1296     close (fd);
1297   if (size > fm->length && fm->length != 0)
1298     /* Due to exponential growth of fm->content, the allocated region
1299        might be much larger than what is actually needed.  */
1300     fm->content = xrealloc (fm->content, fm->length);
1301   fm->mmap_p = 0;
1302   return fm;
1303
1304  lose:
1305   if (!inhibit_close)
1306     close (fd);
1307   xfree (fm->content);
1308   xfree (fm);
1309   return NULL;
1310 }
1311
1312 /* Release the resources held by FM.  Specifically, this calls
1313    munmap() or xfree() on fm->content, depending whether mmap or
1314    malloc/read were used to read in the file.  It also frees the
1315    memory needed to hold the FM structure itself.  */
1316
1317 void
1318 wget_read_file_free (struct file_memory *fm)
1319 {
1320 #ifdef HAVE_MMAP
1321   if (fm->mmap_p)
1322     {
1323       munmap (fm->content, fm->length);
1324     }
1325   else
1326 #endif
1327     {
1328       xfree (fm->content);
1329     }
1330   xfree (fm);
1331 }
1332 \f
1333 /* Free the pointers in a NULL-terminated vector of pointers, then
1334    free the pointer itself.  */
1335 void
1336 free_vec (char **vec)
1337 {
1338   if (vec)
1339     {
1340       char **p = vec;
1341       while (*p)
1342         xfree (*p++);
1343       xfree (vec);
1344     }
1345 }
1346
1347 /* Append vector V2 to vector V1.  The function frees V2 and
1348    reallocates V1 (thus you may not use the contents of neither
1349    pointer after the call).  If V1 is NULL, V2 is returned.  */
1350 char **
1351 merge_vecs (char **v1, char **v2)
1352 {
1353   int i, j;
1354
1355   if (!v1)
1356     return v2;
1357   if (!v2)
1358     return v1;
1359   if (!*v2)
1360     {
1361       /* To avoid j == 0 */
1362       xfree (v2);
1363       return v1;
1364     }
1365   /* Count v1.  */
1366   for (i = 0; v1[i]; i++)
1367     ;
1368   /* Count v2.  */
1369   for (j = 0; v2[j]; j++)
1370     ;
1371   /* Reallocate v1.  */
1372   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1373   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1374   xfree (v2);
1375   return v1;
1376 }
1377
1378 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1379    is allocated as needed.  Return the new value of the vector. */
1380
1381 char **
1382 vec_append (char **vec, const char *str)
1383 {
1384   int cnt;                      /* count of vector elements, including
1385                                    the one we're about to append */
1386   if (vec != NULL)
1387     {
1388       for (cnt = 0; vec[cnt]; cnt++)
1389         ;
1390       ++cnt;
1391     }
1392   else
1393     cnt = 1;
1394   /* Reallocate the array to fit the new element and the NULL. */
1395   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1396   /* Append a copy of STR to the vector. */
1397   vec[cnt - 1] = xstrdup (str);
1398   vec[cnt] = NULL;
1399   return vec;
1400 }
1401 \f
1402 /* Sometimes it's useful to create "sets" of strings, i.e. special
1403    hash tables where you want to store strings as keys and merely
1404    query for their existence.  Here is a set of utility routines that
1405    makes that transparent.  */
1406
1407 void
1408 string_set_add (struct hash_table *ht, const char *s)
1409 {
1410   /* First check whether the set element already exists.  If it does,
1411      do nothing so that we don't have to free() the old element and
1412      then strdup() a new one.  */
1413   if (hash_table_contains (ht, s))
1414     return;
1415
1416   /* We use "1" as value.  It provides us a useful and clear arbitrary
1417      value, and it consumes no memory -- the pointers to the same
1418      string "1" will be shared by all the key-value pairs in all `set'
1419      hash tables.  */
1420   hash_table_put (ht, xstrdup (s), "1");
1421 }
1422
1423 /* Synonym for hash_table_contains... */
1424
1425 int
1426 string_set_contains (struct hash_table *ht, const char *s)
1427 {
1428   return hash_table_contains (ht, s);
1429 }
1430
1431 /* Convert the specified string set to array.  ARRAY should be large
1432    enough to hold hash_table_count(ht) char pointers.  */
1433
1434 void string_set_to_array (struct hash_table *ht, char **array)
1435 {
1436   hash_table_iterator iter;
1437   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1438     *array++ = iter.key;
1439 }
1440
1441 /* Free the string set.  This frees both the storage allocated for
1442    keys and the actual hash table.  (hash_table_destroy would only
1443    destroy the hash table.)  */
1444
1445 void
1446 string_set_free (struct hash_table *ht)
1447 {
1448   hash_table_iterator iter;
1449   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1450     xfree (iter.key);
1451   hash_table_destroy (ht);
1452 }
1453
1454 /* Utility function: simply call xfree() on all keys and values of HT.  */
1455
1456 void
1457 free_keys_and_values (struct hash_table *ht)
1458 {
1459   hash_table_iterator iter;
1460   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1461     {
1462       xfree (iter.key);
1463       xfree (iter.value);
1464     }
1465 }
1466 \f
1467 /* Get digit grouping data for thousand separors by calling
1468    localeconv().  The data includes separator string and grouping info
1469    and is cached after the first call to the function.
1470
1471    In locales that don't set a thousand separator (such as the "C"
1472    locale), this forces it to be ",".  We are now only showing
1473    thousand separators in one place, so this shouldn't be a problem in
1474    practice.  */
1475
1476 static void
1477 get_grouping_data (const char **sep, const char **grouping)
1478 {
1479   static const char *cached_sep;
1480   static const char *cached_grouping;
1481   static bool initialized;
1482   if (!initialized)
1483     {
1484       /* Get the grouping info from the locale. */
1485       struct lconv *lconv = localeconv ();
1486       cached_sep = lconv->thousands_sep;
1487       cached_grouping = lconv->grouping;
1488 #if ! USE_NLS_PROGRESS_BAR
1489       /* We can't count column widths, so ensure that the separator
1490        * is single-byte only (let check below determine what byte). */
1491       if (strlen(cached_sep) > 1)
1492         cached_sep = "";
1493 #endif
1494       if (!*cached_sep)
1495         {
1496           /* Many locales (such as "C" or "hr_HR") don't specify
1497              grouping, which we still want to use it for legibility.
1498              In those locales set the sep char to ',', unless that
1499              character is used for decimal point, in which case set it
1500              to ".".  */
1501           if (*lconv->decimal_point != ',')
1502             cached_sep = ",";
1503           else
1504             cached_sep = ".";
1505           cached_grouping = "\x03";
1506         }
1507       initialized = true;
1508     }
1509   *sep = cached_sep;
1510   *grouping = cached_grouping;
1511 }
1512
1513 /* Return a printed representation of N with thousand separators.
1514    This should respect locale settings, with the exception of the "C"
1515    locale which mandates no separator, but we use one anyway.
1516
1517    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1518    the separators because it's too non-portable, and it's hard to test
1519    for this feature at configure time.  Besides, it wouldn't display
1520    separators in the "C" locale, still used by many Unix users.  */
1521
1522 const char *
1523 with_thousand_seps (wgint n)
1524 {
1525   static char outbuf[48];
1526   char *p = outbuf + sizeof outbuf;
1527
1528   /* Info received from locale */
1529   const char *grouping, *sep;
1530   int seplen;
1531
1532   /* State information */
1533   int i = 0, groupsize;
1534   const char *atgroup;
1535
1536   bool negative = n < 0;
1537
1538   /* Initialize grouping data. */
1539   get_grouping_data (&sep, &grouping);
1540   seplen = strlen (sep);
1541   atgroup = grouping;
1542   groupsize = *atgroup++;
1543
1544   /* This would overflow on WGINT_MIN, but printing negative numbers
1545      is not an important goal of this fuinction.  */
1546   if (negative)
1547     n = -n;
1548
1549   /* Write the number into the buffer, backwards, inserting the
1550      separators as necessary.  */
1551   *--p = '\0';
1552   while (1)
1553     {
1554       *--p = n % 10 + '0';
1555       n /= 10;
1556       if (n == 0)
1557         break;
1558       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1559       if (++i == groupsize)
1560         {
1561           if (seplen == 1)
1562             *--p = *sep;
1563           else
1564             memcpy (p -= seplen, sep, seplen);
1565           i = 0;
1566           if (*atgroup)
1567             groupsize = *atgroup++;
1568         }
1569     }
1570   if (negative)
1571     *--p = '-';
1572
1573   return p;
1574 }
1575
1576 /* N, a byte quantity, is converted to a human-readable abberviated
1577    form a la sizes printed by `ls -lh'.  The result is written to a
1578    static buffer, a pointer to which is returned.
1579
1580    Unlike `with_thousand_seps', this approximates to the nearest unit.
1581    Quoting GNU libit: "Most people visually process strings of 3-4
1582    digits effectively, but longer strings of digits are more prone to
1583    misinterpretation.  Hence, converting to an abbreviated form
1584    usually improves readability."
1585
1586    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1587    original computer-related meaning of "powers of 1024".  We don't
1588    use the "*bibyte" names invented in 1998, and seldom used in
1589    practice.  Wikipedia's entry on "binary prefix" discusses this in
1590    some detail.  */
1591
1592 char *
1593 human_readable (HR_NUMTYPE n)
1594 {
1595   /* These suffixes are compatible with those of GNU `ls -lh'. */
1596   static char powers[] =
1597     {
1598       'K',                      /* kilobyte, 2^10 bytes */
1599       'M',                      /* megabyte, 2^20 bytes */
1600       'G',                      /* gigabyte, 2^30 bytes */
1601       'T',                      /* terabyte, 2^40 bytes */
1602       'P',                      /* petabyte, 2^50 bytes */
1603       'E',                      /* exabyte,  2^60 bytes */
1604     };
1605   static char buf[8];
1606   size_t i;
1607
1608   /* If the quantity is smaller than 1K, just print it. */
1609   if (n < 1024)
1610     {
1611       snprintf (buf, sizeof (buf), "%d", (int) n);
1612       return buf;
1613     }
1614
1615   /* Loop over powers, dividing N with 1024 in each iteration.  This
1616      works unchanged for all sizes of wgint, while still avoiding
1617      non-portable `long double' arithmetic.  */
1618   for (i = 0; i < countof (powers); i++)
1619     {
1620       /* At each iteration N is greater than the *subsequent* power.
1621          That way N/1024.0 produces a decimal number in the units of
1622          *this* power.  */
1623       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1624         {
1625           double val = n / 1024.0;
1626           /* Print values smaller than 10 with one decimal digits, and
1627              others without any decimals.  */
1628           snprintf (buf, sizeof (buf), "%.*f%c",
1629                     val < 10 ? 1 : 0, val, powers[i]);
1630           return buf;
1631         }
1632       n /= 1024;
1633     }
1634   return NULL;                  /* unreached */
1635 }
1636
1637 /* Count the digits in the provided number.  Used to allocate space
1638    when printing numbers.  */
1639
1640 int
1641 numdigit (wgint number)
1642 {
1643   int cnt = 1;
1644   if (number < 0)
1645     ++cnt;                      /* accomodate '-' */
1646   while ((number /= 10) != 0)
1647     ++cnt;
1648   return cnt;
1649 }
1650
1651 #define PR(mask) *p++ = n / (mask) + '0'
1652
1653 /* DIGITS_<D> is used to print a D-digit number and should be called
1654    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1655    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1656    Recursively this continues until DIGITS_1 is invoked.  */
1657
1658 #define DIGITS_1(mask) PR (mask)
1659 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1660 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1661 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1662 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1663 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1664 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1665 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1666 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1667 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1668
1669 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1670
1671 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1672 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1673 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1674 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1675 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1676 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1677 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1678 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1679 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1680
1681 /* Shorthand for casting to wgint. */
1682 #define W wgint
1683
1684 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1685    `sprintf(buffer, "%lld", (long long) number)', only typically much
1686    faster and portable to machines without long long.
1687
1688    The speedup may make a difference in programs that frequently
1689    convert numbers to strings.  Some implementations of sprintf,
1690    particularly the one in some versions of GNU libc, have been known
1691    to be quite slow when converting integers to strings.
1692
1693    Return the pointer to the location where the terminating zero was
1694    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1695    function is done.)
1696
1697    BUFFER should be large enough to accept as many bytes as you expect
1698    the number to take up.  On machines with 64-bit wgints the maximum
1699    needed size is 24 bytes.  That includes the digits needed for the
1700    largest 64-bit number, the `-' sign in case it's negative, and the
1701    terminating '\0'.  */
1702
1703 char *
1704 number_to_string (char *buffer, wgint number)
1705 {
1706   char *p = buffer;
1707   wgint n = number;
1708
1709   int last_digit_char = 0;
1710
1711 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1712   /* We are running in a very strange environment.  Leave the correct
1713      printing to sprintf.  */
1714   p += sprintf (buf, "%j", (intmax_t) (n));
1715 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1716
1717   if (n < 0)
1718     {
1719       if (n < -WGINT_MAX)
1720         {
1721           /* n = -n would overflow because -n would evaluate to a
1722              wgint value larger than WGINT_MAX.  Need to make n
1723              smaller and handle the last digit separately.  */
1724           int last_digit = n % 10;
1725           /* The sign of n%10 is implementation-defined. */
1726           if (last_digit < 0)
1727             last_digit_char = '0' - last_digit;
1728           else
1729             last_digit_char = '0' + last_digit;
1730           /* After n is made smaller, -n will not overflow. */
1731           n /= 10;
1732         }
1733
1734       *p++ = '-';
1735       n = -n;
1736     }
1737
1738   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1739      way printing any N is fully open-coded without a loop or jump.
1740      (Also see description of DIGITS_*.)  */
1741
1742   if      (n < 10)                       DIGITS_1 (1);
1743   else if (n < 100)                      DIGITS_2 (10);
1744   else if (n < 1000)                     DIGITS_3 (100);
1745   else if (n < 10000)                    DIGITS_4 (1000);
1746   else if (n < 100000)                   DIGITS_5 (10000);
1747   else if (n < 1000000)                  DIGITS_6 (100000);
1748   else if (n < 10000000)                 DIGITS_7 (1000000);
1749   else if (n < 100000000)                DIGITS_8 (10000000);
1750   else if (n < 1000000000)               DIGITS_9 (100000000);
1751 #if SIZEOF_WGINT == 4
1752   /* wgint is 32 bits wide: no number has more than 10 digits. */
1753   else                                   DIGITS_10 (1000000000);
1754 #else
1755   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1756      Constants are constructed by compile-time multiplication to avoid
1757      dealing with different notations for 64-bit constants
1758      (nL/nLL/nI64, depending on the compiler and architecture).  */
1759   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1760   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1761   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1762   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1763   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1764   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1765   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1766   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1767   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1768   else                                   DIGITS_19 (1000000000*(W)1000000000);
1769 #endif
1770
1771   if (last_digit_char)
1772     *p++ = last_digit_char;
1773
1774   *p = '\0';
1775 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1776
1777   return p;
1778 }
1779
1780 #undef PR
1781 #undef W
1782 #undef SPRINTF_WGINT
1783 #undef DIGITS_1
1784 #undef DIGITS_2
1785 #undef DIGITS_3
1786 #undef DIGITS_4
1787 #undef DIGITS_5
1788 #undef DIGITS_6
1789 #undef DIGITS_7
1790 #undef DIGITS_8
1791 #undef DIGITS_9
1792 #undef DIGITS_10
1793 #undef DIGITS_11
1794 #undef DIGITS_12
1795 #undef DIGITS_13
1796 #undef DIGITS_14
1797 #undef DIGITS_15
1798 #undef DIGITS_16
1799 #undef DIGITS_17
1800 #undef DIGITS_18
1801 #undef DIGITS_19
1802
1803 #define RING_SIZE 3
1804
1805 /* Print NUMBER to a statically allocated string and return a pointer
1806    to the printed representation.
1807
1808    This function is intended to be used in conjunction with printf.
1809    It is hard to portably print wgint values:
1810     a) you cannot use printf("%ld", number) because wgint can be long
1811        long on 32-bit machines with LFS.
1812     b) you cannot use printf("%lld", number) because NUMBER could be
1813        long on 32-bit machines without LFS, or on 64-bit machines,
1814        which do not require LFS.  Also, Windows doesn't support %lld.
1815     c) you cannot use printf("%j", (int_max_t) number) because not all
1816        versions of printf support "%j", the most notable being the one
1817        on Windows.
1818     d) you cannot #define WGINT_FMT to the appropriate format and use
1819        printf(WGINT_FMT, number) because that would break translations
1820        for user-visible messages, such as printf("Downloaded: %d
1821        bytes\n", number).
1822
1823    What you should use instead is printf("%s", number_to_static_string
1824    (number)).
1825
1826    CAVEAT: since the function returns pointers to static data, you
1827    must be careful to copy its result before calling it again.
1828    However, to make it more useful with printf, the function maintains
1829    an internal ring of static buffers to return.  That way things like
1830    printf("%s %s", number_to_static_string (num1),
1831    number_to_static_string (num2)) work as expected.  Three buffers
1832    are currently used, which means that "%s %s %s" will work, but "%s
1833    %s %s %s" won't.  If you need to print more than three wgints,
1834    bump the RING_SIZE (or rethink your message.)  */
1835
1836 char *
1837 number_to_static_string (wgint number)
1838 {
1839   static char ring[RING_SIZE][24];
1840   static int ringpos;
1841   char *buf = ring[ringpos];
1842   number_to_string (buf, number);
1843   ringpos = (ringpos + 1) % RING_SIZE;
1844   return buf;
1845 }
1846
1847 /* Converts the byte to bits format if --report-bps option is enabled
1848  */
1849 wgint
1850 convert_to_bits (wgint num)
1851 {
1852   if (opt.report_bps)
1853     return num * 8;
1854   return num;
1855 }
1856
1857 \f
1858 /* Determine the width of the terminal we're running on.  If that's
1859    not possible, return 0.  */
1860
1861 int
1862 determine_screen_width (void)
1863 {
1864   /* If there's a way to get the terminal size using POSIX
1865      tcgetattr(), somebody please tell me.  */
1866 #ifdef TIOCGWINSZ
1867   int fd;
1868   struct winsize wsz;
1869
1870   if (opt.lfilename != NULL)
1871     return 0;
1872
1873   fd = fileno (stderr);
1874   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1875     return 0;                   /* most likely ENOTTY */
1876
1877   return wsz.ws_col;
1878 #elif defined(WINDOWS)
1879   CONSOLE_SCREEN_BUFFER_INFO csbi;
1880   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1881     return 0;
1882   return csbi.dwSize.X;
1883 #else  /* neither TIOCGWINSZ nor WINDOWS */
1884   return 0;
1885 #endif /* neither TIOCGWINSZ nor WINDOWS */
1886 }
1887 \f
1888 /* Whether the rnd system (either rand or [dl]rand48) has been
1889    seeded.  */
1890 static int rnd_seeded;
1891
1892 /* Return a random number between 0 and MAX-1, inclusive.
1893
1894    If the system does not support lrand48 and MAX is greater than the
1895    value of RAND_MAX+1 on the system, the returned value will be in
1896    the range [0, RAND_MAX].  This may be fixed in a future release.
1897    The random number generator is seeded automatically the first time
1898    it is called.
1899
1900    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1901    for cryptography.  It is only meant to be used in situations where
1902    quality of the random numbers returned doesn't really matter.  */
1903
1904 int
1905 random_number (int max)
1906 {
1907 #ifdef HAVE_DRAND48
1908   if (!rnd_seeded)
1909     {
1910       srand48 ((long) time (NULL) ^ (long) getpid ());
1911       rnd_seeded = 1;
1912     }
1913   return lrand48 () % max;
1914 #else  /* not HAVE_DRAND48 */
1915
1916   double bounded;
1917   int rnd;
1918   if (!rnd_seeded)
1919     {
1920       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1921       rnd_seeded = 1;
1922     }
1923   rnd = rand ();
1924
1925   /* Like rand() % max, but uses the high-order bits for better
1926      randomness on architectures where rand() is implemented using a
1927      simple congruential generator.  */
1928
1929   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1930   return (int) bounded;
1931
1932 #endif /* not HAVE_DRAND48 */
1933 }
1934
1935 /* Return a random uniformly distributed floating point number in the
1936    [0, 1) range.  Uses drand48 where available, and a really lame
1937    kludge elsewhere.  */
1938
1939 double
1940 random_float (void)
1941 {
1942 #ifdef HAVE_DRAND48
1943   if (!rnd_seeded)
1944     {
1945       srand48 ((long) time (NULL) ^ (long) getpid ());
1946       rnd_seeded = 1;
1947     }
1948   return drand48 ();
1949 #else  /* not HAVE_DRAND48 */
1950   return (  random_number (10000) / 10000.0
1951           + random_number (10000) / (10000.0 * 10000.0)
1952           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1953           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1954 #endif /* not HAVE_DRAND48 */
1955 }
1956 \f
1957 /* Implementation of run_with_timeout, a generic timeout-forcing
1958    routine for systems with Unix-like signal handling.  */
1959
1960 #ifdef USE_SIGNAL_TIMEOUT
1961 # ifdef HAVE_SIGSETJMP
1962 #  define SETJMP(env) sigsetjmp (env, 1)
1963
1964 static sigjmp_buf run_with_timeout_env;
1965
1966 static void
1967 abort_run_with_timeout (int sig)
1968 {
1969   assert (sig == SIGALRM);
1970   siglongjmp (run_with_timeout_env, -1);
1971 }
1972 # else /* not HAVE_SIGSETJMP */
1973 #  define SETJMP(env) setjmp (env)
1974
1975 static jmp_buf run_with_timeout_env;
1976
1977 static void
1978 abort_run_with_timeout (int sig)
1979 {
1980   assert (sig == SIGALRM);
1981   /* We don't have siglongjmp to preserve the set of blocked signals;
1982      if we longjumped out of the handler at this point, SIGALRM would
1983      remain blocked.  We must unblock it manually. */
1984   sigset_t set;
1985   sigemptyset (&set);
1986   sigaddset (&set, SIGALRM);
1987   sigprocmask (SIG_BLOCK, &set, NULL);
1988
1989   /* Now it's safe to longjump. */
1990   longjmp (run_with_timeout_env, -1);
1991 }
1992 # endif /* not HAVE_SIGSETJMP */
1993
1994 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1995    setitimer where available, alarm otherwise.
1996
1997    TIMEOUT should be non-zero.  If the timeout value is so small that
1998    it would be rounded to zero, it is rounded to the least legal value
1999    instead (1us for setitimer, 1s for alarm).  That ensures that
2000    SIGALRM will be delivered in all cases.  */
2001
2002 static void
2003 alarm_set (double timeout)
2004 {
2005 #ifdef ITIMER_REAL
2006   /* Use the modern itimer interface. */
2007   struct itimerval itv;
2008   xzero (itv);
2009   itv.it_value.tv_sec = (long) timeout;
2010   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
2011   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
2012     /* Ensure that we wait for at least the minimum interval.
2013        Specifying zero would mean "wait forever".  */
2014     itv.it_value.tv_usec = 1;
2015   setitimer (ITIMER_REAL, &itv, NULL);
2016 #else  /* not ITIMER_REAL */
2017   /* Use the old alarm() interface. */
2018   int secs = (int) timeout;
2019   if (secs == 0)
2020     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
2021        because alarm(0) means "never deliver the alarm", i.e. "wait
2022        forever", which is not what someone who specifies a 0.5s
2023        timeout would expect.  */
2024     secs = 1;
2025   alarm (secs);
2026 #endif /* not ITIMER_REAL */
2027 }
2028
2029 /* Cancel the alarm set with alarm_set. */
2030
2031 static void
2032 alarm_cancel (void)
2033 {
2034 #ifdef ITIMER_REAL
2035   struct itimerval disable;
2036   xzero (disable);
2037   setitimer (ITIMER_REAL, &disable, NULL);
2038 #else  /* not ITIMER_REAL */
2039   alarm (0);
2040 #endif /* not ITIMER_REAL */
2041 }
2042
2043 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2044    seconds.  Returns true if the function was interrupted with a
2045    timeout, false otherwise.
2046
2047    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2048    using setitimer() or alarm().  The timeout is enforced by
2049    longjumping out of the SIGALRM handler.  This has several
2050    advantages compared to the traditional approach of relying on
2051    signals causing system calls to exit with EINTR:
2052
2053      * The callback function is *forcibly* interrupted after the
2054        timeout expires, (almost) regardless of what it was doing and
2055        whether it was in a syscall.  For example, a calculation that
2056        takes a long time is interrupted as reliably as an IO
2057        operation.
2058
2059      * It works with both SYSV and BSD signals because it doesn't
2060        depend on the default setting of SA_RESTART.
2061
2062      * It doesn't require special handler setup beyond a simple call
2063        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2064        optional.)
2065
2066    The only downside is that, if FUN allocates internal resources that
2067    are normally freed prior to exit from the functions, they will be
2068    lost in case of timeout.  */
2069
2070 bool
2071 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2072 {
2073   int saved_errno;
2074
2075   if (timeout == 0)
2076     {
2077       fun (arg);
2078       return false;
2079     }
2080
2081   signal (SIGALRM, abort_run_with_timeout);
2082   if (SETJMP (run_with_timeout_env) != 0)
2083     {
2084       /* Longjumped out of FUN with a timeout. */
2085       signal (SIGALRM, SIG_DFL);
2086       return true;
2087     }
2088   alarm_set (timeout);
2089   fun (arg);
2090
2091   /* Preserve errno in case alarm() or signal() modifies it. */
2092   saved_errno = errno;
2093   alarm_cancel ();
2094   signal (SIGALRM, SIG_DFL);
2095   errno = saved_errno;
2096
2097   return false;
2098 }
2099
2100 #else  /* not USE_SIGNAL_TIMEOUT */
2101
2102 #ifndef WINDOWS
2103 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2104    define it under Windows, because Windows has its own version of
2105    run_with_timeout that uses threads.  */
2106
2107 bool
2108 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2109 {
2110   fun (arg);
2111   return false;
2112 }
2113 #endif /* not WINDOWS */
2114 #endif /* not USE_SIGNAL_TIMEOUT */
2115 \f
2116 #ifndef WINDOWS
2117
2118 /* Sleep the specified amount of seconds.  On machines without
2119    nanosleep(), this may sleep shorter if interrupted by signals.  */
2120
2121 void
2122 xsleep (double seconds)
2123 {
2124 #ifdef HAVE_NANOSLEEP
2125   /* nanosleep is the preferred interface because it offers high
2126      accuracy and, more importantly, because it allows us to reliably
2127      restart receiving a signal such as SIGWINCH.  (There was an
2128      actual Debian bug report about --limit-rate malfunctioning while
2129      the terminal was being resized.)  */
2130   struct timespec sleep, remaining;
2131   sleep.tv_sec = (long) seconds;
2132   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2133   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2134     /* If nanosleep has been interrupted by a signal, adjust the
2135        sleeping period and return to sleep.  */
2136     sleep = remaining;
2137 #elif defined(HAVE_USLEEP)
2138   /* If usleep is available, use it in preference to select.  */
2139   if (seconds >= 1)
2140     {
2141       /* On some systems, usleep cannot handle values larger than
2142          1,000,000.  If the period is larger than that, use sleep
2143          first, then add usleep for subsecond accuracy.  */
2144       sleep (seconds);
2145       seconds -= (long) seconds;
2146     }
2147   usleep (seconds * 1000000);
2148 #else /* fall back select */
2149   /* Note that, although Windows supports select, it can't be used to
2150      implement sleeping because Winsock's select doesn't implement
2151      timeout when it is passed NULL pointers for all fd sets.  (But it
2152      does under Cygwin, which implements Unix-compatible select.)  */
2153   struct timeval sleep;
2154   sleep.tv_sec = (long) seconds;
2155   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2156   select (0, NULL, NULL, NULL, &sleep);
2157   /* If select returns -1 and errno is EINTR, it means we were
2158      interrupted by a signal.  But without knowing how long we've
2159      actually slept, we can't return to sleep.  Using gettimeofday to
2160      track sleeps is slow and unreliable due to clock skew.  */
2161 #endif
2162 }
2163
2164 #endif /* not WINDOWS */
2165
2166 /* Encode the octets in DATA of length LENGTH to base64 format,
2167    storing the result to DEST.  The output will be zero-terminated,
2168    and must point to a writable buffer of at least
2169    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2170    the resulting base64 data, not counting the terminating zero.
2171
2172    This implementation does not emit newlines after 76 characters of
2173    base64 data.  */
2174
2175 int
2176 base64_encode (const void *data, int length, char *dest)
2177 {
2178   /* Conversion table.  */
2179   static const char tbl[64] = {
2180     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2181     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2182     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2183     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2184   };
2185   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2186      don't work for data with MSB set. */
2187   const unsigned char *s = data;
2188   /* Theoretical ANSI violation when length < 3. */
2189   const unsigned char *end = (const unsigned char *) data + length - 2;
2190   char *p = dest;
2191
2192   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2193   for (; s < end; s += 3)
2194     {
2195       *p++ = tbl[s[0] >> 2];
2196       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2197       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2198       *p++ = tbl[s[2] & 0x3f];
2199     }
2200
2201   /* Pad the result if necessary...  */
2202   switch (length % 3)
2203     {
2204     case 1:
2205       *p++ = tbl[s[0] >> 2];
2206       *p++ = tbl[(s[0] & 3) << 4];
2207       *p++ = '=';
2208       *p++ = '=';
2209       break;
2210     case 2:
2211       *p++ = tbl[s[0] >> 2];
2212       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2213       *p++ = tbl[((s[1] & 0xf) << 2)];
2214       *p++ = '=';
2215       break;
2216     }
2217   /* ...and zero-terminate it.  */
2218   *p = '\0';
2219
2220   return p - dest;
2221 }
2222
2223 /* Store in C the next non-whitespace character from the string, or \0
2224    when end of string is reached.  */
2225 #define NEXT_CHAR(c, p) do {                    \
2226   c = (unsigned char) *p++;                     \
2227 } while (c_isspace (c))
2228
2229 #define IS_ASCII(c) (((c) & 0x80) == 0)
2230
2231 /* Decode data from BASE64 (a null-terminated string) into memory
2232    pointed to by DEST.  DEST is assumed to be large enough to
2233    accomodate the decoded data, which is guaranteed to be no more than
2234    3/4*strlen(base64).
2235
2236    Since DEST is assumed to contain binary data, it is not
2237    NUL-terminated.  The function returns the length of the data
2238    written to TO.  -1 is returned in case of error caused by malformed
2239    base64 input.
2240
2241    This function originates from Free Recode.  */
2242
2243 int
2244 base64_decode (const char *base64, void *dest)
2245 {
2246   /* Table of base64 values for first 128 characters.  Note that this
2247      assumes ASCII (but so does Wget in other places).  */
2248   static const signed char base64_char_to_value[128] =
2249     {
2250       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2251       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2252       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2253       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2254       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2255       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2256       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2257       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2258       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2259       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2260       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2261       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2262       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2263     };
2264 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2265 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2266
2267   const char *p = base64;
2268   char *q = dest;
2269
2270   while (1)
2271     {
2272       unsigned char c;
2273       unsigned long value;
2274
2275       /* Process first byte of a quadruplet.  */
2276       NEXT_CHAR (c, p);
2277       if (!c)
2278         break;
2279       if (c == '=' || !IS_BASE64 (c))
2280         return -1;              /* illegal char while decoding base64 */
2281       value = BASE64_CHAR_TO_VALUE (c) << 18;
2282
2283       /* Process second byte of a quadruplet.  */
2284       NEXT_CHAR (c, p);
2285       if (!c)
2286         return -1;              /* premature EOF while decoding base64 */
2287       if (c == '=' || !IS_BASE64 (c))
2288         return -1;              /* illegal char while decoding base64 */
2289       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2290       *q++ = value >> 16;
2291
2292       /* Process third byte of a quadruplet.  */
2293       NEXT_CHAR (c, p);
2294       if (!c)
2295         return -1;              /* premature EOF while decoding base64 */
2296       if (!IS_BASE64 (c))
2297         return -1;              /* illegal char while decoding base64 */
2298
2299       if (c == '=')
2300         {
2301           NEXT_CHAR (c, p);
2302           if (!c)
2303             return -1;          /* premature EOF while decoding base64 */
2304           if (c != '=')
2305             return -1;          /* padding `=' expected but not found */
2306           continue;
2307         }
2308
2309       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2310       *q++ = 0xff & value >> 8;
2311
2312       /* Process fourth byte of a quadruplet.  */
2313       NEXT_CHAR (c, p);
2314       if (!c)
2315         return -1;              /* premature EOF while decoding base64 */
2316       if (c == '=')
2317         continue;
2318       if (!IS_BASE64 (c))
2319         return -1;              /* illegal char while decoding base64 */
2320
2321       value |= BASE64_CHAR_TO_VALUE (c);
2322       *q++ = 0xff & value;
2323     }
2324 #undef IS_BASE64
2325 #undef BASE64_CHAR_TO_VALUE
2326
2327   return q - (char *) dest;
2328 }
2329
2330 #ifdef HAVE_LIBPCRE
2331 /* Compiles the PCRE regex. */
2332 void *
2333 compile_pcre_regex (const char *str)
2334 {
2335   const char *errbuf;
2336   int erroffset;
2337   pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
2338   if (! regex)
2339     {
2340       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2341                quote (str), errbuf);
2342       return false;
2343     }
2344   return regex;
2345 }
2346 #endif
2347
2348 /* Compiles the POSIX regex. */
2349 void *
2350 compile_posix_regex (const char *str)
2351 {
2352   regex_t *regex = xmalloc (sizeof (regex_t));
2353   int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
2354   if (errcode != 0)
2355     {
2356       int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
2357       char *errbuf = xmalloc (errbuf_size);
2358       regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
2359       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2360                quote (str), errbuf);
2361       xfree (errbuf);
2362       return NULL;
2363     }
2364
2365   return regex;
2366 }
2367
2368 #ifdef HAVE_LIBPCRE
2369 #define OVECCOUNT 30
2370 /* Matches a PCRE regex.  */
2371 bool
2372 match_pcre_regex (const void *regex, const char *str)
2373 {
2374   int l = strlen (str);
2375   int ovector[OVECCOUNT];
2376
2377   int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
2378   if (rc == PCRE_ERROR_NOMATCH)
2379     return false;
2380   else if (rc < 0)
2381     {
2382       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2383                  quote (str), rc);
2384       return false;
2385     }
2386   else
2387     return true;
2388 }
2389 #undef OVECCOUNT
2390 #endif
2391
2392 /* Matches a POSIX regex.  */
2393 bool
2394 match_posix_regex (const void *regex, const char *str)
2395 {
2396   int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
2397   if (rc == REG_NOMATCH)
2398     return false;
2399   else if (rc == 0)
2400     return true;
2401   else
2402     {
2403       int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
2404       char *errbuf = xmalloc (errbuf_size);
2405       regerror (rc, opt.acceptregex, errbuf, errbuf_size);
2406       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2407                  quote (str), rc);
2408       xfree (errbuf);
2409       return false;
2410     }
2411 }
2412
2413 #undef IS_ASCII
2414 #undef NEXT_CHAR
2415 \f
2416 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2417    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2418
2419 static void
2420 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2421                     int (*cmpfun) (const void *, const void *))
2422 {
2423 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2424   if (from < to)
2425     {
2426       size_t i, j, k;
2427       size_t mid = (to + from) / 2;
2428       mergesort_internal (base, temp, size, from, mid, cmpfun);
2429       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2430       i = from;
2431       j = mid + 1;
2432       for (k = from; (i <= mid) && (j <= to); k++)
2433         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2434           memcpy (ELT (temp, k), ELT (base, i++), size);
2435         else
2436           memcpy (ELT (temp, k), ELT (base, j++), size);
2437       while (i <= mid)
2438         memcpy (ELT (temp, k++), ELT (base, i++), size);
2439       while (j <= to)
2440         memcpy (ELT (temp, k++), ELT (base, j++), size);
2441       for (k = from; k <= to; k++)
2442         memcpy (ELT (base, k), ELT (temp, k), size);
2443     }
2444 #undef ELT
2445 }
2446
2447 /* Stable sort with interface exactly like standard library's qsort.
2448    Uses mergesort internally, allocating temporary storage with
2449    alloca.  */
2450
2451 void
2452 stable_sort (void *base, size_t nmemb, size_t size,
2453              int (*cmpfun) (const void *, const void *))
2454 {
2455   if (size > 1)
2456     {
2457       void *temp = alloca (nmemb * size * sizeof (void *));
2458       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2459     }
2460 }
2461 \f
2462 /* Print a decimal number.  If it is equal to or larger than ten, the
2463    number is rounded.  Otherwise it is printed with one significant
2464    digit without trailing zeros and with no more than three fractional
2465    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2466    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2467
2468    This is useful for displaying durations because it provides
2469    order-of-magnitude information without unnecessary clutter --
2470    long-running downloads are shown without the fractional part, and
2471    short ones still retain one significant digit.  */
2472
2473 const char *
2474 print_decimal (double number)
2475 {
2476   static char buf[32];
2477   double n = number >= 0 ? number : -number;
2478
2479   if (n >= 9.95)
2480     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2481        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2482     snprintf (buf, sizeof buf, "%.0f", number);
2483   else if (n >= 0.95)
2484     snprintf (buf, sizeof buf, "%.1f", number);
2485   else if (n >= 0.001)
2486     snprintf (buf, sizeof buf, "%.1g", number);
2487   else if (n >= 0.0005)
2488     /* round [0.0005, 0.001) to 0.001 */
2489     snprintf (buf, sizeof buf, "%.3f", number);
2490   else
2491     /* print numbers close to 0 as 0, not 0.000 */
2492     strcpy (buf, "0");
2493
2494   return buf;
2495 }
2496
2497 #ifdef TESTING
2498
2499 const char *
2500 test_subdir_p()
2501 {
2502   int i;
2503   struct {
2504     char *d1;
2505     char *d2;
2506     bool result;
2507   } test_array[] = {
2508     { "/somedir", "/somedir", true },
2509     { "/somedir", "/somedir/d2", true },
2510     { "/somedir/d1", "/somedir", false },
2511   };
2512
2513   for (i = 0; i < countof(test_array); ++i)
2514     {
2515       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2516
2517       mu_assert ("test_subdir_p: wrong result",
2518                  res == test_array[i].result);
2519     }
2520
2521   return NULL;
2522 }
2523
2524 const char *
2525 test_dir_matches_p()
2526 {
2527   int i;
2528   struct {
2529     char *dirlist[3];
2530     char *dir;
2531     bool result;
2532   } test_array[] = {
2533     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2534     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2535     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2536     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2537     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2538     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2539     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2540     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2541     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2542     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2543     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2544     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2545     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2546     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2547     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2548   };
2549
2550   for (i = 0; i < countof(test_array); ++i)
2551     {
2552       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2553
2554       mu_assert ("test_dir_matches_p: wrong result",
2555                  res == test_array[i].result);
2556     }
2557
2558   return NULL;
2559 }
2560
2561 #endif /* TESTING */
2562