sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #include <errno.h>
  46 #include <fcntl.h>
  47 #include <assert.h>
  48 #include <stdarg.h>
  49 #include <locale.h>
  50
  51 #if HAVE_UTIME
  52 # include <sys/types.h>
  53 # ifdef HAVE_UTIME_H
  54 #  include <utime.h>
  55 # endif
  56
  57 # ifdef HAVE_SYS_UTIME_H
  58 #  include <sys/utime.h>
  59 # endif
  60 #endif
  61
  62 #include <sys/time.h>
  63
  64 #include <sys/stat.h>
  65
  66 /* For TIOCGWINSZ and friends: */
  67 #include <sys/ioctl.h>
  68 #ifdef HAVE_TERMIOS_H
  69 # include <termios.h>
  70 #endif
  71
  72 /* Needed for Unix version of run_with_timeout. */
  73 #include <signal.h>
  74 #include <setjmp.h>
  75
  76 #include <regex.h>
  77 #ifdef HAVE_LIBPCRE
  78 # include <pcre.h>
  79 #endif
  80
  81 #ifndef HAVE_SIGSETJMP
  82 /* If sigsetjmp is a macro, configure won't pick it up. */
  83 # ifdef sigsetjmp
  84 #  define HAVE_SIGSETJMP
  85 # endif
  86 #endif
  87
  88 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  89 # define USE_SIGNAL_TIMEOUT
  90 #endif
  91
  92 #include "utils.h"
  93 #include "hash.h"
  94
  95 #ifdef __VMS
  96 #include "vms.h"
  97 #endif /* def __VMS */
  98
  99 #ifdef TESTING
 100 #include "test.h"
 101 #endif
 102
 103 static void
 104 memfatal (const char *context, long attempted_size)
 105 {
 106   /* Make sure we don't try to store part of the log line, and thus
 107      call malloc.  */
 108   log_set_save_context (false);
 109
 110   /* We have different log outputs in different situations:
 111      1) output without bytes information
 112      2) output with bytes information  */
 113   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 114     {
 115       logprintf (LOG_ALWAYS,
 116                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 117                  exec_name, context);
 118     }
 119   else
 120     {
 121       logprintf (LOG_ALWAYS,
 122                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 123                  exec_name, context, attempted_size);
 124     }
 125
 126   exit (1);
 127 }
 128
 129 /* Character property table for (re-)escaping VMS ODS5 extended file
 130    names.  Note that this table ignores Unicode.
 131
 132    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 133
 134    ODS5 Invalid characters:
 135       C0 control codes (0x00 to 0x1F inclusive)
 136       Asterisk (*)
 137       Question mark (?)
 138
 139    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 140       Double quotation marks (")
 141       Backslash (\)
 142       Colon (:)
 143       Left angle bracket (<)
 144       Right angle bracket (>)
 145       Slash (/)
 146       Vertical bar (|)
 147
 148    Characters escaped by "^":
 149       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 150        @  [  \  ]  ^  `  {  |  }  ~
 151
 152    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 153    case.  Note that un-escaped < and > can also confuse a directory
 154    spec.
 155
 156    Characters put out as ^xx:
 157       7F (DEL)
 158       80-9F (C1 control characters)
 159       A0 (nonbreaking space)
 160       FF (Latin small letter y diaeresis)
 161
 162    Other cases:
 163       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 164
 165     Property table values:
 166       Normal escape:    1
 167       Space:            2
 168       Dot:              4
 169       Hex-hex escape:   8
 170       ODS2 normal:     16
 171       ODS2 lower case: 32
 172       Hex digit:       64
 173 */
 174
 175 unsigned char char_prop[ 256] = {
 176
 177 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 178     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 179
 180 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 181     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 182
 183 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 184     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 185
 186 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 187    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 188
 189 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 190     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 191
 192 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 193    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 194
 195 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 196     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 197
 198 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 199    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 200
 201     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 202     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 203     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 204     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 205     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 206     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 207     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 208     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 209 };
 210
 211 /* Utility function: like xstrdup(), but also lowercases S.  */
 212
 213 char *
 214 xstrdup_lower (const char *s)
 215 {
 216   char *copy = xstrdup (s);
 217   char *p = copy;
 218   for (; *p; p++)
 219     *p = c_tolower (*p);
 220   return copy;
 221 }
 222
 223 /* Copy the string formed by two pointers (one on the beginning, other
 224    on the char after the last char) to a new, malloc-ed location.
 225    0-terminate it.  */
 226 char *
 227 strdupdelim (const char *beg, const char *end)
 228 {
 229   char *res = xmalloc (end - beg + 1);
 230   memcpy (res, beg, end - beg);
 231   res[end - beg] = '\0';
 232   return res;
 233 }
 234
 235 /* Parse a string containing comma-separated elements, and return a
 236    vector of char pointers with the elements.  Spaces following the
 237    commas are ignored.  */
 238 char **
 239 sepstring (const char *s)
 240 {
 241   char **res;
 242   const char *p;
 243   int i = 0;
 244
 245   if (!s || !*s)
 246     return NULL;
 247   res = NULL;
 248   p = s;
 249   while (*s)
 250     {
 251       if (*s == ',')
 252         {
 253           res = xrealloc (res, (i + 2) * sizeof (char *));
 254           res[i] = strdupdelim (p, s);
 255           res[++i] = NULL;
 256           ++s;
 257           /* Skip the blanks following the ','.  */
 258           while (c_isspace (*s))
 259             ++s;
 260           p = s;
 261         }
 262       else
 263         ++s;
 264     }
 265   res = xrealloc (res, (i + 2) * sizeof (char *));
 266   res[i] = strdupdelim (p, s);
 267   res[i + 1] = NULL;
 268   return res;
 269 }
 270 \f
 271 /* Like sprintf, but prints into a string of sufficient size freshly
 272    allocated with malloc, which is returned.  If unable to print due
 273    to invalid format, returns NULL.  Inability to allocate needed
 274    memory results in abort, as with xmalloc.  This is in spirit
 275    similar to the GNU/BSD extension asprintf, but somewhat easier to
 276    use.
 277
 278    Internally the function either calls vasprintf or loops around
 279    vsnprintf until the correct size is found.  Since Wget also ships a
 280    fallback implementation of vsnprintf, this should be portable.  */
 281
 282 /* Constant is using for limits memory allocation for text buffer.
 283    Applicable in situation when: vasprintf is not available in the system
 284    and vsnprintf return -1 when long line is truncated (in old versions of
 285    glibc and in other system where C99 doesn`t support) */
 286
 287 #define FMT_MAX_LENGTH 1048576
 288
 289 char *
 290 aprintf (const char *fmt, ...)
 291 {
 292 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 293   /* Use vasprintf. */
 294   int ret;
 295   va_list args;
 296   char *str;
 297   va_start (args, fmt);
 298   ret = vasprintf (&str, fmt, args);
 299   va_end (args);
 300   if (ret < 0 && errno == ENOMEM)
 301     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 302                                                       with xmalloc/xrealloc */
 303   else if (ret < 0)
 304     return NULL;
 305   return str;
 306 #else  /* not HAVE_VASPRINTF */
 307
 308   /* vasprintf is unavailable.  snprintf into a small buffer and
 309      resize it as necessary. */
 310   int size = 32;
 311   char *str = xmalloc (size);
 312
 313   /* #### This code will infloop and eventually abort in xrealloc if
 314      passed a FMT that causes snprintf to consistently return -1.  */
 315
 316   while (1)
 317     {
 318       int n;
 319       va_list args;
 320
 321       va_start (args, fmt);
 322       n = vsnprintf (str, size, fmt, args);
 323       va_end (args);
 324
 325       /* If the printing worked, return the string. */
 326       if (n > -1 && n < size)
 327         return str;
 328
 329       /* Else try again with a larger buffer. */
 330       if (n > -1)               /* C99 */
 331         size = n + 1;           /* precisely what is needed */
 332       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 333         {                               /* maybe we have some wrong
 334                                            format string? */
 335           logprintf (LOG_ALWAYS,
 336                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 337                        "aborting.\n"),
 338                      exec_name, size);  /* printout a log message */
 339           abort ();                     /* and abort... */
 340         }
 341       else
 342         {
 343           /* else, we continue to grow our
 344            * buffer: Twice the old size. */
 345           size <<= 1;
 346         }
 347       str = xrealloc (str, size);
 348     }
 349 #endif /* not HAVE_VASPRINTF */
 350 }
 351
 352 /* Concatenate the NULL-terminated list of string arguments into
 353    freshly allocated space.  */
 354
 355 char *
 356 concat_strings (const char *str0, ...)
 357 {
 358   va_list args;
 359   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 360   char *ret, *p;
 361
 362   const char *next_str;
 363   int total_length = 0;
 364   size_t argcount;
 365
 366   /* Calculate the length of and allocate the resulting string. */
 367
 368   argcount = 0;
 369   va_start (args, str0);
 370   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 371     {
 372       int len = strlen (next_str);
 373       if (argcount < countof (saved_lengths))
 374         saved_lengths[argcount++] = len;
 375       total_length += len;
 376     }
 377   va_end (args);
 378   p = ret = xmalloc (total_length + 1);
 379
 380   /* Copy the strings into the allocated space. */
 381
 382   argcount = 0;
 383   va_start (args, str0);
 384   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 385     {
 386       int len;
 387       if (argcount < countof (saved_lengths))
 388         len = saved_lengths[argcount++];
 389       else
 390         len = strlen (next_str);
 391       memcpy (p, next_str, len);
 392       p += len;
 393     }
 394   va_end (args);
 395   *p = '\0';
 396
 397   return ret;
 398 }
 399 \f
 400 /* Format the provided time according to the specified format.  The
 401    format is a string with format elements supported by strftime.  */
 402
 403 static char *
 404 fmttime (time_t t, const char *fmt)
 405 {
 406   static char output[32];
 407   struct tm *tm = localtime(&t);
 408   if (!tm)
 409     abort ();
 410   if (!strftime(output, sizeof(output), fmt, tm))
 411     abort ();
 412   return output;
 413 }
 414
 415 /* Return pointer to a static char[] buffer in which zero-terminated
 416    string-representation of TM (in form hh:mm:ss) is printed.
 417
 418    If TM is NULL, the current time will be used.  */
 419
 420 char *
 421 time_str (time_t t)
 422 {
 423   return fmttime(t, "%H:%M:%S");
 424 }
 425
 426 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 427
 428 char *
 429 datetime_str (time_t t)
 430 {
 431   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 432 }
 433 \f
 434 /* The Windows versions of the following two functions are defined in
 435    mswindows.c. On MSDOS this function should never be called. */
 436
 437 #ifdef __VMS
 438
 439 void
 440 fork_to_background (void)
 441 {
 442   return;
 443 }
 444
 445 #else /* def __VMS */
 446
 447 #if !defined(WINDOWS) && !defined(MSDOS)
 448 void
 449 fork_to_background (void)
 450 {
 451   pid_t pid;
 452   /* Whether we arrange our own version of opt.lfilename here.  */
 453   bool logfile_changed = false;
 454
 455   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 456     {
 457       /* We must create the file immediately to avoid either a race
 458          condition (which arises from using unique_name and failing to
 459          use fopen_excl) or lying to the user about the log file name
 460          (which arises from using unique_name, printing the name, and
 461          using fopen_excl later on.)  */
 462       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 463       if (new_log_fp)
 464         {
 465           logfile_changed = true;
 466           fclose (new_log_fp);
 467         }
 468     }
 469   pid = fork ();
 470   if (pid < 0)
 471     {
 472       /* parent, error */
 473       perror ("fork");
 474       exit (1);
 475     }
 476   else if (pid != 0)
 477     {
 478       /* parent, no error */
 479       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 480       if (logfile_changed)
 481         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 482       exit (0);                 /* #### should we use _exit()? */
 483     }
 484
 485   /* child: give up the privileges and keep running. */
 486   setsid ();
 487   freopen ("/dev/null", "r", stdin);
 488   freopen ("/dev/null", "w", stdout);
 489   freopen ("/dev/null", "w", stderr);
 490 }
 491 #endif /* !WINDOWS && !MSDOS */
 492
 493 #endif /* def __VMS [else] */
 494
 495 \f
 496 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 497    specified with TM.  The atime ("access time") is set to the current
 498    time.  */
 499
 500 void
 501 touch (const char *file, time_t tm)
 502 {
 503 #if HAVE_UTIME
 504 # ifdef HAVE_STRUCT_UTIMBUF
 505   struct utimbuf times;
 506 # else
 507   struct {
 508     time_t actime;
 509     time_t modtime;
 510   } times;
 511 # endif
 512   times.modtime = tm;
 513   times.actime = time (NULL);
 514   if (utime (file, &times) == -1)
 515     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 516 #else
 517   struct timespec timespecs[2];
 518   int fd;
 519
 520   fd = open (file, O_WRONLY);
 521   if (fd < 0)
 522     {
 523       logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
 524       return;
 525     }
 526
 527   timespecs[0].tv_sec = time (NULL);
 528   timespecs[0].tv_nsec = 0L;
 529   timespecs[1].tv_sec = tm;
 530   timespecs[1].tv_nsec = 0L;
 531
 532   if (futimens (fd, timespecs) == -1)
 533     logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
 534
 535   close (fd);
 536 #endif
 537 }
 538
 539 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 540    nothing under MS-Windows.  */
 541 int
 542 remove_link (const char *file)
 543 {
 544   int err = 0;
 545   struct_stat st;
 546
 547   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 548     {
 549       DEBUGP (("Unlinking %s (symlink).\n", file));
 550       err = unlink (file);
 551       if (err != 0)
 552         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 553                    quote (file), strerror (errno));
 554     }
 555   return err;
 556 }
 557
 558 /* Does FILENAME exist?  This is quite a lousy implementation, since
 559    it supplies no error codes -- only a yes-or-no answer.  Thus it
 560    will return that a file does not exist if, e.g., the directory is
 561    unreadable.  I don't mind it too much currently, though.  The
 562    proper way should, of course, be to have a third, error state,
 563    other than true/false, but that would introduce uncalled-for
 564    additional complexity to the callers.  */
 565 bool
 566 file_exists_p (const char *filename)
 567 {
 568 #ifdef HAVE_ACCESS
 569   return access (filename, F_OK) >= 0;
 570 #else
 571   struct_stat buf;
 572   return stat (filename, &buf) >= 0;
 573 #endif
 574 }
 575
 576 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 577    Returns 0 on error.  */
 578 bool
 579 file_non_directory_p (const char *path)
 580 {
 581   struct_stat buf;
 582   /* Use lstat() rather than stat() so that symbolic links pointing to
 583      directories can be identified correctly.  */
 584   if (lstat (path, &buf) != 0)
 585     return false;
 586   return S_ISDIR (buf.st_mode) ? false : true;
 587 }
 588
 589 /* Return the size of file named by FILENAME, or -1 if it cannot be
 590    opened or seeked into. */
 591 wgint
 592 file_size (const char *filename)
 593 {
 594 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 595   wgint size;
 596   /* We use fseek rather than stat to determine the file size because
 597      that way we can also verify that the file is readable without
 598      explicitly checking for permissions.  Inspired by the POST patch
 599      by Arnaud Wylie.  */
 600   FILE *fp = fopen (filename, "rb");
 601   if (!fp)
 602     return -1;
 603   fseeko (fp, 0, SEEK_END);
 604   size = ftello (fp);
 605   fclose (fp);
 606   return size;
 607 #else
 608   struct_stat st;
 609   if (stat (filename, &st) < 0)
 610     return -1;
 611   return st.st_size;
 612 #endif
 613 }
 614
 615 /* 2005-02-19 SMS.
 616    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 617    original name.  With the VMS file systems' versioning, everything
 618    should be fine, and appending ".NN" just causes trouble.
 619 */
 620
 621 #ifdef UNIQ_SEP
 622
 623 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 624    doesn't exist is found.  Return a freshly allocated copy of the
 625    unused file name.  */
 626
 627 static char *
 628 unique_name_1 (const char *prefix)
 629 {
 630   int count = 1;
 631   int plen = strlen (prefix);
 632   char *template = (char *)alloca (plen + 1 + 24);
 633   char *template_tail = template + plen;
 634
 635   memcpy (template, prefix, plen);
 636   *template_tail++ = UNIQ_SEP;
 637
 638   do
 639     number_to_string (template_tail, count++);
 640   while (file_exists_p (template));
 641
 642   return xstrdup (template);
 643 }
 644
 645 /* Return a unique file name, based on FILE.
 646
 647    More precisely, if FILE doesn't exist, it is returned unmodified.
 648    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 649    file name that doesn't exist is returned.
 650
 651    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 652
 653    The resulting file is not created, only verified that it didn't
 654    exist at the point in time when the function was called.
 655    Therefore, where security matters, don't rely that the file created
 656    by this function exists until you open it with O_EXCL or
 657    equivalent.
 658
 659    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 660    string.  Otherwise, it may return FILE if the file doesn't exist
 661    (and therefore doesn't need changing).  */
 662
 663 char *
 664 unique_name (const char *file, bool allow_passthrough)
 665 {
 666   /* If the FILE itself doesn't exist, return it without
 667      modification. */
 668   if (!file_exists_p (file))
 669     return allow_passthrough ? (char *)file : xstrdup (file);
 670
 671   /* Otherwise, find a numeric suffix that results in unused file name
 672      and return it.  */
 673   return unique_name_1 (file);
 674 }
 675
 676 #else /* def UNIQ_SEP */
 677
 678 /* Dummy unique_name() for VMS.  Return the original name as easily as
 679    possible.
 680 */
 681 char *
 682 unique_name (const char *file, bool allow_passthrough)
 683 {
 684   /* Return the FILE itself, without modification, irregardful. */
 685   return allow_passthrough ? (char *)file : xstrdup (file);
 686 }
 687
 688 #endif /* def UNIQ_SEP [else] */
 689
 690 /* Create a file based on NAME, except without overwriting an existing
 691    file with that name.  Providing O_EXCL is correctly implemented,
 692    this function does not have the race condition associated with
 693    opening the file returned by unique_name.  */
 694
 695 FILE *
 696 unique_create (const char *name, bool binary, char **opened_name)
 697 {
 698   /* unique file name, based on NAME */
 699   char *uname = unique_name (name, false);
 700   FILE *fp;
 701   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 702     {
 703       xfree (uname);
 704       uname = unique_name (name, false);
 705     }
 706   if (opened_name && fp != NULL)
 707     {
 708       if (fp)
 709         *opened_name = uname;
 710       else
 711         {
 712           *opened_name = NULL;
 713           xfree (uname);
 714         }
 715     }
 716   else
 717     xfree (uname);
 718   return fp;
 719 }
 720
 721 /* Open the file for writing, with the addition that the file is
 722    opened "exclusively".  This means that, if the file already exists,
 723    this function will *fail* and errno will be set to EEXIST.  If
 724    BINARY is set, the file will be opened in binary mode, equivalent
 725    to fopen's "wb".
 726
 727    If opening the file fails for any reason, including the file having
 728    previously existed, this function returns NULL and sets errno
 729    appropriately.  */
 730
 731 FILE *
 732 fopen_excl (const char *fname, int binary)
 733 {
 734   int fd;
 735 #ifdef O_EXCL
 736
 737 /* 2005-04-14 SMS.
 738    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 739    It also has file versions which obviate all the O_EXCL effort.
 740    O_TRUNC (something of a misnomer) requests a new version.
 741 */
 742 # ifdef __VMS
 743 /* Common open() optional arguments:
 744    sequential access only, access callback function.
 745 */
 746 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 747
 748   int open_id;
 749   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 750
 751   if (binary > 1)
 752     {
 753       open_id = 11;
 754       fd = open( fname,                 /* File name. */
 755        flags,                           /* Flags. */
 756        0777,                            /* Mode for default protection. */
 757        "ctx=bin,stm",                   /* Binary, stream access. */
 758        "rfm=stmlf",                     /* Stream_LF. */
 759        OPEN_OPT_ARGS);                  /* Access callback. */
 760     }
 761   else if (binary)
 762     {
 763       open_id = 12;
 764       fd = open( fname,                 /* File name. */
 765        flags,                           /* Flags. */
 766        0777,                            /* Mode for default protection. */
 767        "ctx=bin,stm",                   /* Binary, stream access. */
 768        "rfm=fix",                       /* Fixed-length, */
 769        "mrs=512",                       /* 512-byte records. */
 770        OPEN_OPT_ARGS);                  /* Access callback. */
 771     }
 772   else
 773     {
 774       open_id = 13;
 775       fd = open( fname,                 /* File name. */
 776        flags,                           /* Flags. */
 777        0777,                            /* Mode for default protection. */
 778        "rfm=stmlf",                     /* Stream_LF. */
 779        OPEN_OPT_ARGS);                  /* Access callback. */
 780     }
 781 # else /* def __VMS */
 782   int flags = O_WRONLY | O_CREAT | O_EXCL;
 783 # ifdef O_BINARY
 784   if (binary)
 785     flags |= O_BINARY;
 786 # endif
 787   fd = open (fname, flags, 0666);
 788 # endif /* def __VMS [else] */
 789
 790   if (fd < 0)
 791     return NULL;
 792   return fdopen (fd, binary ? "wb" : "w");
 793 #else  /* not O_EXCL */
 794   /* Manually check whether the file exists.  This is prone to race
 795      conditions, but systems without O_EXCL haven't deserved
 796      better.  */
 797   if (file_exists_p (fname))
 798     {
 799       errno = EEXIST;
 800       return NULL;
 801     }
 802   return fopen (fname, binary ? "wb" : "w");
 803 #endif /* not O_EXCL */
 804 }
 805 \f
 806 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 807    are missing, create them first.  In case any mkdir() call fails,
 808    return its error status.  Returns 0 on successful completion.
 809
 810    The behaviour of this function should be identical to the behaviour
 811    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 812 int
 813 make_directory (const char *directory)
 814 {
 815   int i, ret, quit = 0;
 816   char *dir;
 817
 818   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 819      function is unsafe if called with a read-only char *argument.  */
 820   STRDUP_ALLOCA (dir, directory);
 821
 822   /* If the first character of dir is '/', skip it (and thus enable
 823      creation of absolute-pathname directories.  */
 824   for (i = (*dir == '/'); 1; ++i)
 825     {
 826       for (; dir[i] && dir[i] != '/'; i++)
 827         ;
 828       if (!dir[i])
 829         quit = 1;
 830       dir[i] = '\0';
 831       /* Check whether the directory already exists.  Allow creation of
 832          of intermediate directories to fail, as the initial path components
 833          are not necessarily directories!  */
 834       if (!file_exists_p (dir))
 835         ret = mkdir (dir, 0777);
 836       else
 837         ret = 0;
 838       if (quit)
 839         break;
 840       else
 841         dir[i] = '/';
 842     }
 843   return ret;
 844 }
 845
 846 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 847    should be a file name.
 848
 849    file_merge("/foo/bar", "baz")  => "/foo/baz"
 850    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 851    file_merge("foo", "bar")       => "bar"
 852
 853    In other words, it's a simpler and gentler version of uri_merge.  */
 854
 855 char *
 856 file_merge (const char *base, const char *file)
 857 {
 858   char *result;
 859   const char *cut = (const char *)strrchr (base, '/');
 860
 861   if (!cut)
 862     return xstrdup (file);
 863
 864   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 865   memcpy (result, base, cut - base);
 866   result[cut - base] = '/';
 867   strcpy (result + (cut - base) + 1, file);
 868
 869   return result;
 870 }
 871 \f
 872 /* Like fnmatch, but performs a case-insensitive match.  */
 873
 874 int
 875 fnmatch_nocase (const char *pattern, const char *string, int flags)
 876 {
 877 #ifdef FNM_CASEFOLD
 878   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 879      also present on *BSD platforms, and possibly elsewhere.  */
 880   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 881 #else
 882   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 883   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 884   char *strcopy = (char *) alloca (strlen (string) + 1);
 885   char *p;
 886   for (p = patcopy; *pattern; pattern++, p++)
 887     *p = c_tolower (*pattern);
 888   *p = '\0';
 889   for (p = strcopy; *string; string++, p++)
 890     *p = c_tolower (*string);
 891   *p = '\0';
 892   return fnmatch (patcopy, strcopy, flags);
 893 #endif
 894 }
 895
 896 static bool in_acclist (const char *const *, const char *, bool);
 897
 898 /* Determine whether a file is acceptable to be followed, according to
 899    lists of patterns to accept/reject.  */
 900 bool
 901 acceptable (const char *s)
 902 {
 903   const char *p;
 904
 905   if (opt.output_document && strcmp (s, opt.output_document) == 0)
 906     return true;
 907
 908   if ((p = strrchr (s, '/')))
 909     s = p + 1;
 910
 911   if (opt.accepts)
 912     {
 913       if (opt.rejects)
 914         return (in_acclist ((const char *const *)opt.accepts, s, true)
 915                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 916       else
 917         return in_acclist ((const char *const *)opt.accepts, s, true);
 918     }
 919   else if (opt.rejects)
 920     return !in_acclist ((const char *const *)opt.rejects, s, true);
 921
 922   return true;
 923 }
 924
 925 /* Determine whether an URL is acceptable to be followed, according to
 926    regex patterns to accept/reject.  */
 927 bool
 928 accept_url (const char *s)
 929 {
 930   if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
 931     return false;
 932   if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
 933     return false;
 934
 935   return true;
 936 }
 937
 938 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 939    will return true if and only if D2 begins with `/something/' or is exactly
 940    '/something'.  */
 941 bool
 942 subdir_p (const char *d1, const char *d2)
 943 {
 944   if (*d1 == '\0')
 945     return true;
 946   if (!opt.ignore_case)
 947     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 948       ;
 949   else
 950     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 951       ;
 952
 953   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 954 }
 955
 956 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 957    first element that matches DIR, through wildcards or front comparison (as
 958    appropriate).  */
 959 static bool
 960 dir_matches_p (char **dirlist, const char *dir)
 961 {
 962   char **x;
 963   int (*matcher) (const char *, const char *, int)
 964     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 965
 966   for (x = dirlist; *x; x++)
 967     {
 968       /* Remove leading '/' */
 969       char *p = *x + (**x == '/');
 970       if (has_wildcards_p (p))
 971         {
 972           if (matcher (p, dir, FNM_PATHNAME) == 0)
 973             break;
 974         }
 975       else
 976         {
 977           if (subdir_p (p, dir))
 978             break;
 979         }
 980     }
 981
 982   return *x ? true : false;
 983 }
 984
 985 /* Returns whether DIRECTORY is acceptable for download, wrt the
 986    include/exclude lists.
 987
 988    The leading `/' is ignored in paths; relative and absolute paths
 989    may be freely intermixed.  */
 990
 991 bool
 992 accdir (const char *directory)
 993 {
 994   /* Remove starting '/'.  */
 995   if (*directory == '/')
 996     ++directory;
 997   if (opt.includes)
 998     {
 999       if (!dir_matches_p (opt.includes, directory))
1000         return false;
1001     }
1002   if (opt.excludes)
1003     {
1004       if (dir_matches_p (opt.excludes, directory))
1005         return false;
1006     }
1007   return true;
1008 }
1009
1010 /* Return true if STRING ends with TAIL.  For instance:
1011
1012    match_tail ("abc", "bc", false)  -> 1
1013    match_tail ("abc", "ab", false)  -> 0
1014    match_tail ("abc", "abc", false) -> 1
1015
1016    If FOLD_CASE is true, the comparison will be case-insensitive.  */
1017
1018 bool
1019 match_tail (const char *string, const char *tail, bool fold_case)
1020 {
1021   int pos = strlen (string) - strlen (tail);
1022
1023   if (pos < 0)
1024     return false;  /* tail is longer than string.  */
1025
1026   if (!fold_case)
1027     return strcmp (string + pos, tail);
1028   else
1029     return strcasecmp (string + pos, tail);
1030 }
1031
1032 /* Checks whether string S matches each element of ACCEPTS.  A list
1033    element are matched either with fnmatch() or match_tail(),
1034    according to whether the element contains wildcards or not.
1035
1036    If the BACKWARD is false, don't do backward comparison -- just compare
1037    them normally.  */
1038 static bool
1039 in_acclist (const char *const *accepts, const char *s, bool backward)
1040 {
1041   for (; *accepts; accepts++)
1042     {
1043       if (has_wildcards_p (*accepts))
1044         {
1045           int res = opt.ignore_case
1046             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1047           /* fnmatch returns 0 if the pattern *does* match the string.  */
1048           if (res == 0)
1049             return true;
1050         }
1051       else
1052         {
1053           if (backward)
1054             {
1055               if (match_tail (s, *accepts, opt.ignore_case))
1056                 return true;
1057             }
1058           else
1059             {
1060               int cmp = opt.ignore_case
1061                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1062               if (cmp == 0)
1063                 return true;
1064             }
1065         }
1066     }
1067   return false;
1068 }
1069
1070 /* Return the location of STR's suffix (file extension).  Examples:
1071    suffix ("foo.bar")       -> "bar"
1072    suffix ("foo.bar.baz")   -> "baz"
1073    suffix ("/foo/bar")      -> NULL
1074    suffix ("/foo.bar/baz")  -> NULL  */
1075 char *
1076 suffix (const char *str)
1077 {
1078   char *p;
1079
1080   if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
1081     return p + 1;
1082
1083   return NULL;
1084 }
1085
1086 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1087    `]').  */
1088
1089 bool
1090 has_wildcards_p (const char *s)
1091 {
1092   return !!strpbrk (s, "*?[]");
1093 }
1094
1095 /* Return true if FNAME ends with a typical HTML suffix.  The
1096    following (case-insensitive) suffixes are presumed to be HTML
1097    files:
1098
1099      html
1100      htm
1101      ?html (`?' matches one character)
1102
1103    #### CAVEAT.  This is not necessarily a good indication that FNAME
1104    refers to a file that contains HTML!  */
1105 bool
1106 has_html_suffix_p (const char *fname)
1107 {
1108   char *suf;
1109
1110   if ((suf = suffix (fname)) == NULL)
1111     return false;
1112   if (!strcasecmp (suf, "html"))
1113     return true;
1114   if (!strcasecmp (suf, "htm"))
1115     return true;
1116   if (suf[0] && !strcasecmp (suf + 1, "html"))
1117     return true;
1118   return false;
1119 }
1120
1121 /* Read a line from FP and return the pointer to freshly allocated
1122    storage.  The storage space is obtained through malloc() and should
1123    be freed with free() when it is no longer needed.
1124
1125    The length of the line is not limited, except by available memory.
1126    The newline character at the end of line is retained.  The line is
1127    terminated with a zero character.
1128
1129    After end-of-file is encountered without anything being read, NULL
1130    is returned.  NULL is also returned on error.  To distinguish
1131    between these two cases, use the stdio function ferror().  */
1132
1133 char *
1134 read_whole_line (FILE *fp)
1135 {
1136   int length = 0;
1137   int bufsize = 82;
1138   char *line = xmalloc (bufsize);
1139
1140   while (fgets (line + length, bufsize - length, fp))
1141     {
1142       length += strlen (line + length);
1143       if (length == 0)
1144         /* Possible for example when reading from a binary file where
1145            a line begins with \0.  */
1146         continue;
1147
1148       if (line[length - 1] == '\n')
1149         break;
1150
1151       /* fgets() guarantees to read the whole line, or to use up the
1152          space we've given it.  We can double the buffer
1153          unconditionally.  */
1154       bufsize <<= 1;
1155       line = xrealloc (line, bufsize);
1156     }
1157   if (length == 0 || ferror (fp))
1158     {
1159       xfree (line);
1160       return NULL;
1161     }
1162   if (length + 1 < bufsize)
1163     /* Relieve the memory from our exponential greediness.  We say
1164        `length + 1' because the terminating \0 is not included in
1165        LENGTH.  We don't need to zero-terminate the string ourselves,
1166        though, because fgets() does that.  */
1167     line = xrealloc (line, length + 1);
1168   return line;
1169 }
1170 \f
1171 /* Read FILE into memory.  A pointer to `struct file_memory' are
1172    returned; use struct element `content' to access file contents, and
1173    the element `length' to know the file length.  `content' is *not*
1174    zero-terminated, and you should *not* read or write beyond the [0,
1175    length) range of characters.
1176
1177    After you are done with the file contents, call wget_read_file_free to
1178    release the memory.
1179
1180    Depending on the operating system and the type of file that is
1181    being read, wget_read_file() either mmap's the file into memory, or
1182    reads the file into the core using read().
1183
1184    If file is named "-", fileno(stdin) is used for reading instead.
1185    If you want to read from a real file named "-", use "./-" instead.  */
1186
1187 struct file_memory *
1188 wget_read_file (const char *file)
1189 {
1190   int fd;
1191   struct file_memory *fm;
1192   long size;
1193   bool inhibit_close = false;
1194
1195   /* Some magic in the finest tradition of Perl and its kin: if FILE
1196      is "-", just use stdin.  */
1197   if (HYPHENP (file))
1198     {
1199       fd = fileno (stdin);
1200       inhibit_close = true;
1201       /* Note that we don't inhibit mmap() in this case.  If stdin is
1202          redirected from a regular file, mmap() will still work.  */
1203     }
1204   else
1205     fd = open (file, O_RDONLY);
1206   if (fd < 0)
1207     return NULL;
1208   fm = xnew (struct file_memory);
1209
1210 #ifdef HAVE_MMAP
1211   {
1212     struct_fstat buf;
1213     if (fstat (fd, &buf) < 0)
1214       goto mmap_lose;
1215     fm->length = buf.st_size;
1216     /* NOTE: As far as I know, the callers of this function never
1217        modify the file text.  Relying on this would enable us to
1218        specify PROT_READ and MAP_SHARED for a marginal gain in
1219        efficiency, but at some cost to generality.  */
1220     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1221                         MAP_PRIVATE, fd, 0);
1222     if (fm->content == (char *)MAP_FAILED)
1223       goto mmap_lose;
1224     if (!inhibit_close)
1225       close (fd);
1226
1227     fm->mmap_p = 1;
1228     return fm;
1229   }
1230
1231  mmap_lose:
1232   /* The most common reason why mmap() fails is that FD does not point
1233      to a plain file.  However, it's also possible that mmap() doesn't
1234      work for a particular type of file.  Therefore, whenever mmap()
1235      fails, we just fall back to the regular method.  */
1236 #endif /* HAVE_MMAP */
1237
1238   fm->length = 0;
1239   size = 512;                   /* number of bytes fm->contents can
1240                                    hold at any given time. */
1241   fm->content = xmalloc (size);
1242   while (1)
1243     {
1244       wgint nread;
1245       if (fm->length > size / 2)
1246         {
1247           /* #### I'm not sure whether the whole exponential-growth
1248              thing makes sense with kernel read.  On Linux at least,
1249              read() refuses to read more than 4K from a file at a
1250              single chunk anyway.  But other Unixes might optimize it
1251              better, and it doesn't *hurt* anything, so I'm leaving
1252              it.  */
1253
1254           /* Normally, we grow SIZE exponentially to make the number
1255              of calls to read() and realloc() logarithmic in relation
1256              to file size.  However, read() can read an amount of data
1257              smaller than requested, and it would be unreasonable to
1258              double SIZE every time *something* was read.  Therefore,
1259              we double SIZE only when the length exceeds half of the
1260              entire allocated size.  */
1261           size <<= 1;
1262           fm->content = xrealloc (fm->content, size);
1263         }
1264       nread = read (fd, fm->content + fm->length, size - fm->length);
1265       if (nread > 0)
1266         /* Successful read. */
1267         fm->length += nread;
1268       else if (nread < 0)
1269         /* Error. */
1270         goto lose;
1271       else
1272         /* EOF */
1273         break;
1274     }
1275   if (!inhibit_close)
1276     close (fd);
1277   if (size > fm->length && fm->length != 0)
1278     /* Due to exponential growth of fm->content, the allocated region
1279        might be much larger than what is actually needed.  */
1280     fm->content = xrealloc (fm->content, fm->length);
1281   fm->mmap_p = 0;
1282   return fm;
1283
1284  lose:
1285   if (!inhibit_close)
1286     close (fd);
1287   xfree (fm->content);
1288   xfree (fm);
1289   return NULL;
1290 }
1291
1292 /* Release the resources held by FM.  Specifically, this calls
1293    munmap() or xfree() on fm->content, depending whether mmap or
1294    malloc/read were used to read in the file.  It also frees the
1295    memory needed to hold the FM structure itself.  */
1296
1297 void
1298 wget_read_file_free (struct file_memory *fm)
1299 {
1300 #ifdef HAVE_MMAP
1301   if (fm->mmap_p)
1302     {
1303       munmap (fm->content, fm->length);
1304     }
1305   else
1306 #endif
1307     {
1308       xfree (fm->content);
1309     }
1310   xfree (fm);
1311 }
1312 \f
1313 /* Free the pointers in a NULL-terminated vector of pointers, then
1314    free the pointer itself.  */
1315 void
1316 free_vec (char **vec)
1317 {
1318   if (vec)
1319     {
1320       char **p = vec;
1321       while (*p)
1322         xfree (*p++);
1323       xfree (vec);
1324     }
1325 }
1326
1327 /* Append vector V2 to vector V1.  The function frees V2 and
1328    reallocates V1 (thus you may not use the contents of neither
1329    pointer after the call).  If V1 is NULL, V2 is returned.  */
1330 char **
1331 merge_vecs (char **v1, char **v2)
1332 {
1333   int i, j;
1334
1335   if (!v1)
1336     return v2;
1337   if (!v2)
1338     return v1;
1339   if (!*v2)
1340     {
1341       /* To avoid j == 0 */
1342       xfree (v2);
1343       return v1;
1344     }
1345   /* Count v1.  */
1346   for (i = 0; v1[i]; i++)
1347     ;
1348   /* Count v2.  */
1349   for (j = 0; v2[j]; j++)
1350     ;
1351   /* Reallocate v1.  */
1352   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1353   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1354   xfree (v2);
1355   return v1;
1356 }
1357
1358 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1359    is allocated as needed.  Return the new value of the vector. */
1360
1361 char **
1362 vec_append (char **vec, const char *str)
1363 {
1364   int cnt;                      /* count of vector elements, including
1365                                    the one we're about to append */
1366   if (vec != NULL)
1367     {
1368       for (cnt = 0; vec[cnt]; cnt++)
1369         ;
1370       ++cnt;
1371     }
1372   else
1373     cnt = 1;
1374   /* Reallocate the array to fit the new element and the NULL. */
1375   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1376   /* Append a copy of STR to the vector. */
1377   vec[cnt - 1] = xstrdup (str);
1378   vec[cnt] = NULL;
1379   return vec;
1380 }
1381 \f
1382 /* Sometimes it's useful to create "sets" of strings, i.e. special
1383    hash tables where you want to store strings as keys and merely
1384    query for their existence.  Here is a set of utility routines that
1385    makes that transparent.  */
1386
1387 void
1388 string_set_add (struct hash_table *ht, const char *s)
1389 {
1390   /* First check whether the set element already exists.  If it does,
1391      do nothing so that we don't have to free() the old element and
1392      then strdup() a new one.  */
1393   if (hash_table_contains (ht, s))
1394     return;
1395
1396   /* We use "1" as value.  It provides us a useful and clear arbitrary
1397      value, and it consumes no memory -- the pointers to the same
1398      string "1" will be shared by all the key-value pairs in all `set'
1399      hash tables.  */
1400   hash_table_put (ht, xstrdup (s), "1");
1401 }
1402
1403 /* Synonym for hash_table_contains... */
1404
1405 int
1406 string_set_contains (struct hash_table *ht, const char *s)
1407 {
1408   return hash_table_contains (ht, s);
1409 }
1410
1411 /* Convert the specified string set to array.  ARRAY should be large
1412    enough to hold hash_table_count(ht) char pointers.  */
1413
1414 void string_set_to_array (struct hash_table *ht, char **array)
1415 {
1416   hash_table_iterator iter;
1417   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1418     *array++ = iter.key;
1419 }
1420
1421 /* Free the string set.  This frees both the storage allocated for
1422    keys and the actual hash table.  (hash_table_destroy would only
1423    destroy the hash table.)  */
1424
1425 void
1426 string_set_free (struct hash_table *ht)
1427 {
1428   hash_table_iterator iter;
1429   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1430     xfree (iter.key);
1431   hash_table_destroy (ht);
1432 }
1433
1434 /* Utility function: simply call xfree() on all keys and values of HT.  */
1435
1436 void
1437 free_keys_and_values (struct hash_table *ht)
1438 {
1439   hash_table_iterator iter;
1440   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1441     {
1442       xfree (iter.key);
1443       xfree (iter.value);
1444     }
1445 }
1446 \f
1447 /* Get digit grouping data for thousand separors by calling
1448    localeconv().  The data includes separator string and grouping info
1449    and is cached after the first call to the function.
1450
1451    In locales that don't set a thousand separator (such as the "C"
1452    locale), this forces it to be ",".  We are now only showing
1453    thousand separators in one place, so this shouldn't be a problem in
1454    practice.  */
1455
1456 static void
1457 get_grouping_data (const char **sep, const char **grouping)
1458 {
1459   static const char *cached_sep;
1460   static const char *cached_grouping;
1461   static bool initialized;
1462   if (!initialized)
1463     {
1464       /* Get the grouping info from the locale. */
1465       struct lconv *lconv = localeconv ();
1466       cached_sep = lconv->thousands_sep;
1467       cached_grouping = lconv->grouping;
1468 #if ! USE_NLS_PROGRESS_BAR
1469       /* We can't count column widths, so ensure that the separator
1470        * is single-byte only (let check below determine what byte). */
1471       if (strlen(cached_sep) > 1)
1472         cached_sep = "";
1473 #endif
1474       if (!*cached_sep)
1475         {
1476           /* Many locales (such as "C" or "hr_HR") don't specify
1477              grouping, which we still want to use it for legibility.
1478              In those locales set the sep char to ',', unless that
1479              character is used for decimal point, in which case set it
1480              to ".".  */
1481           if (*lconv->decimal_point != ',')
1482             cached_sep = ",";
1483           else
1484             cached_sep = ".";
1485           cached_grouping = "\x03";
1486         }
1487       initialized = true;
1488     }
1489   *sep = cached_sep;
1490   *grouping = cached_grouping;
1491 }
1492
1493 /* Return a printed representation of N with thousand separators.
1494    This should respect locale settings, with the exception of the "C"
1495    locale which mandates no separator, but we use one anyway.
1496
1497    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1498    the separators because it's too non-portable, and it's hard to test
1499    for this feature at configure time.  Besides, it wouldn't display
1500    separators in the "C" locale, still used by many Unix users.  */
1501
1502 const char *
1503 with_thousand_seps (wgint n)
1504 {
1505   static char outbuf[48];
1506   char *p = outbuf + sizeof outbuf;
1507
1508   /* Info received from locale */
1509   const char *grouping, *sep;
1510   int seplen;
1511
1512   /* State information */
1513   int i = 0, groupsize;
1514   const char *atgroup;
1515
1516   bool negative = n < 0;
1517
1518   /* Initialize grouping data. */
1519   get_grouping_data (&sep, &grouping);
1520   seplen = strlen (sep);
1521   atgroup = grouping;
1522   groupsize = *atgroup++;
1523
1524   /* This would overflow on WGINT_MIN, but printing negative numbers
1525      is not an important goal of this fuinction.  */
1526   if (negative)
1527     n = -n;
1528
1529   /* Write the number into the buffer, backwards, inserting the
1530      separators as necessary.  */
1531   *--p = '\0';
1532   while (1)
1533     {
1534       *--p = n % 10 + '0';
1535       n /= 10;
1536       if (n == 0)
1537         break;
1538       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1539       if (++i == groupsize)
1540         {
1541           if (seplen == 1)
1542             *--p = *sep;
1543           else
1544             memcpy (p -= seplen, sep, seplen);
1545           i = 0;
1546           if (*atgroup)
1547             groupsize = *atgroup++;
1548         }
1549     }
1550   if (negative)
1551     *--p = '-';
1552
1553   return p;
1554 }
1555
1556 /* N, a byte quantity, is converted to a human-readable abberviated
1557    form a la sizes printed by `ls -lh'.  The result is written to a
1558    static buffer, a pointer to which is returned.
1559
1560    Unlike `with_thousand_seps', this approximates to the nearest unit.
1561    Quoting GNU libit: "Most people visually process strings of 3-4
1562    digits effectively, but longer strings of digits are more prone to
1563    misinterpretation.  Hence, converting to an abbreviated form
1564    usually improves readability."
1565
1566    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1567    original computer-related meaning of "powers of 1024".  We don't
1568    use the "*bibyte" names invented in 1998, and seldom used in
1569    practice.  Wikipedia's entry on "binary prefix" discusses this in
1570    some detail.  */
1571
1572 char *
1573 human_readable (HR_NUMTYPE n)
1574 {
1575   /* These suffixes are compatible with those of GNU `ls -lh'. */
1576   static char powers[] =
1577     {
1578       'K',                      /* kilobyte, 2^10 bytes */
1579       'M',                      /* megabyte, 2^20 bytes */
1580       'G',                      /* gigabyte, 2^30 bytes */
1581       'T',                      /* terabyte, 2^40 bytes */
1582       'P',                      /* petabyte, 2^50 bytes */
1583       'E',                      /* exabyte,  2^60 bytes */
1584     };
1585   static char buf[8];
1586   size_t i;
1587
1588   /* If the quantity is smaller than 1K, just print it. */
1589   if (n < 1024)
1590     {
1591       snprintf (buf, sizeof (buf), "%d", (int) n);
1592       return buf;
1593     }
1594
1595   /* Loop over powers, dividing N with 1024 in each iteration.  This
1596      works unchanged for all sizes of wgint, while still avoiding
1597      non-portable `long double' arithmetic.  */
1598   for (i = 0; i < countof (powers); i++)
1599     {
1600       /* At each iteration N is greater than the *subsequent* power.
1601          That way N/1024.0 produces a decimal number in the units of
1602          *this* power.  */
1603       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1604         {
1605           double val = n / 1024.0;
1606           /* Print values smaller than 10 with one decimal digits, and
1607              others without any decimals.  */
1608           snprintf (buf, sizeof (buf), "%.*f%c",
1609                     val < 10 ? 1 : 0, val, powers[i]);
1610           return buf;
1611         }
1612       n /= 1024;
1613     }
1614   return NULL;                  /* unreached */
1615 }
1616
1617 /* Count the digits in the provided number.  Used to allocate space
1618    when printing numbers.  */
1619
1620 int
1621 numdigit (wgint number)
1622 {
1623   int cnt = 1;
1624   if (number < 0)
1625     ++cnt;                      /* accomodate '-' */
1626   while ((number /= 10) != 0)
1627     ++cnt;
1628   return cnt;
1629 }
1630
1631 #define PR(mask) *p++ = n / (mask) + '0'
1632
1633 /* DIGITS_<D> is used to print a D-digit number and should be called
1634    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1635    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1636    Recursively this continues until DIGITS_1 is invoked.  */
1637
1638 #define DIGITS_1(mask) PR (mask)
1639 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1640 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1641 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1642 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1643 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1644 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1645 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1646 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1647 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1648
1649 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1650
1651 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1652 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1653 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1654 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1655 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1656 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1657 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1658 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1659 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1660
1661 /* Shorthand for casting to wgint. */
1662 #define W wgint
1663
1664 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1665    `sprintf(buffer, "%lld", (long long) number)', only typically much
1666    faster and portable to machines without long long.
1667
1668    The speedup may make a difference in programs that frequently
1669    convert numbers to strings.  Some implementations of sprintf,
1670    particularly the one in some versions of GNU libc, have been known
1671    to be quite slow when converting integers to strings.
1672
1673    Return the pointer to the location where the terminating zero was
1674    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1675    function is done.)
1676
1677    BUFFER should be large enough to accept as many bytes as you expect
1678    the number to take up.  On machines with 64-bit wgints the maximum
1679    needed size is 24 bytes.  That includes the digits needed for the
1680    largest 64-bit number, the `-' sign in case it's negative, and the
1681    terminating '\0'.  */
1682
1683 char *
1684 number_to_string (char *buffer, wgint number)
1685 {
1686   char *p = buffer;
1687   wgint n = number;
1688
1689   int last_digit_char = 0;
1690
1691 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1692   /* We are running in a very strange environment.  Leave the correct
1693      printing to sprintf.  */
1694   p += sprintf (buf, "%j", (intmax_t) (n));
1695 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1696
1697   if (n < 0)
1698     {
1699       if (n < -WGINT_MAX)
1700         {
1701           /* n = -n would overflow because -n would evaluate to a
1702              wgint value larger than WGINT_MAX.  Need to make n
1703              smaller and handle the last digit separately.  */
1704           int last_digit = n % 10;
1705           /* The sign of n%10 is implementation-defined. */
1706           if (last_digit < 0)
1707             last_digit_char = '0' - last_digit;
1708           else
1709             last_digit_char = '0' + last_digit;
1710           /* After n is made smaller, -n will not overflow. */
1711           n /= 10;
1712         }
1713
1714       *p++ = '-';
1715       n = -n;
1716     }
1717
1718   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1719      way printing any N is fully open-coded without a loop or jump.
1720      (Also see description of DIGITS_*.)  */
1721
1722   if      (n < 10)                       DIGITS_1 (1);
1723   else if (n < 100)                      DIGITS_2 (10);
1724   else if (n < 1000)                     DIGITS_3 (100);
1725   else if (n < 10000)                    DIGITS_4 (1000);
1726   else if (n < 100000)                   DIGITS_5 (10000);
1727   else if (n < 1000000)                  DIGITS_6 (100000);
1728   else if (n < 10000000)                 DIGITS_7 (1000000);
1729   else if (n < 100000000)                DIGITS_8 (10000000);
1730   else if (n < 1000000000)               DIGITS_9 (100000000);
1731 #if SIZEOF_WGINT == 4
1732   /* wgint is 32 bits wide: no number has more than 10 digits. */
1733   else                                   DIGITS_10 (1000000000);
1734 #else
1735   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1736      Constants are constructed by compile-time multiplication to avoid
1737      dealing with different notations for 64-bit constants
1738      (nL/nLL/nI64, depending on the compiler and architecture).  */
1739   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1740   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1741   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1742   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1743   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1744   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1745   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1746   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1747   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1748   else                                   DIGITS_19 (1000000000*(W)1000000000);
1749 #endif
1750
1751   if (last_digit_char)
1752     *p++ = last_digit_char;
1753
1754   *p = '\0';
1755 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1756
1757   return p;
1758 }
1759
1760 #undef PR
1761 #undef W
1762 #undef SPRINTF_WGINT
1763 #undef DIGITS_1
1764 #undef DIGITS_2
1765 #undef DIGITS_3
1766 #undef DIGITS_4
1767 #undef DIGITS_5
1768 #undef DIGITS_6
1769 #undef DIGITS_7
1770 #undef DIGITS_8
1771 #undef DIGITS_9
1772 #undef DIGITS_10
1773 #undef DIGITS_11
1774 #undef DIGITS_12
1775 #undef DIGITS_13
1776 #undef DIGITS_14
1777 #undef DIGITS_15
1778 #undef DIGITS_16
1779 #undef DIGITS_17
1780 #undef DIGITS_18
1781 #undef DIGITS_19
1782
1783 #define RING_SIZE 3
1784
1785 /* Print NUMBER to a statically allocated string and return a pointer
1786    to the printed representation.
1787
1788    This function is intended to be used in conjunction with printf.
1789    It is hard to portably print wgint values:
1790     a) you cannot use printf("%ld", number) because wgint can be long
1791        long on 32-bit machines with LFS.
1792     b) you cannot use printf("%lld", number) because NUMBER could be
1793        long on 32-bit machines without LFS, or on 64-bit machines,
1794        which do not require LFS.  Also, Windows doesn't support %lld.
1795     c) you cannot use printf("%j", (int_max_t) number) because not all
1796        versions of printf support "%j", the most notable being the one
1797        on Windows.
1798     d) you cannot #define WGINT_FMT to the appropriate format and use
1799        printf(WGINT_FMT, number) because that would break translations
1800        for user-visible messages, such as printf("Downloaded: %d
1801        bytes\n", number).
1802
1803    What you should use instead is printf("%s", number_to_static_string
1804    (number)).
1805
1806    CAVEAT: since the function returns pointers to static data, you
1807    must be careful to copy its result before calling it again.
1808    However, to make it more useful with printf, the function maintains
1809    an internal ring of static buffers to return.  That way things like
1810    printf("%s %s", number_to_static_string (num1),
1811    number_to_static_string (num2)) work as expected.  Three buffers
1812    are currently used, which means that "%s %s %s" will work, but "%s
1813    %s %s %s" won't.  If you need to print more than three wgints,
1814    bump the RING_SIZE (or rethink your message.)  */
1815
1816 char *
1817 number_to_static_string (wgint number)
1818 {
1819   static char ring[RING_SIZE][24];
1820   static int ringpos;
1821   char *buf = ring[ringpos];
1822   number_to_string (buf, number);
1823   ringpos = (ringpos + 1) % RING_SIZE;
1824   return buf;
1825 }
1826
1827 /* Converts the byte to bits format if --report-bps option is enabled
1828  */
1829 wgint
1830 convert_to_bits (wgint num)
1831 {
1832   if (opt.report_bps)
1833     return num * 8;
1834   return num;
1835 }
1836
1837 \f
1838 /* Determine the width of the terminal we're running on.  If that's
1839    not possible, return 0.  */
1840
1841 int
1842 determine_screen_width (void)
1843 {
1844   /* If there's a way to get the terminal size using POSIX
1845      tcgetattr(), somebody please tell me.  */
1846 #ifdef TIOCGWINSZ
1847   int fd;
1848   struct winsize wsz;
1849
1850   if (opt.lfilename != NULL)
1851     return 0;
1852
1853   fd = fileno (stderr);
1854   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1855     return 0;                   /* most likely ENOTTY */
1856
1857   return wsz.ws_col;
1858 #elif defined(WINDOWS)
1859   CONSOLE_SCREEN_BUFFER_INFO csbi;
1860   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1861     return 0;
1862   return csbi.dwSize.X;
1863 #else  /* neither TIOCGWINSZ nor WINDOWS */
1864   return 0;
1865 #endif /* neither TIOCGWINSZ nor WINDOWS */
1866 }
1867 \f
1868 /* Whether the rnd system (either rand or [dl]rand48) has been
1869    seeded.  */
1870 static int rnd_seeded;
1871
1872 /* Return a random number between 0 and MAX-1, inclusive.
1873
1874    If the system does not support lrand48 and MAX is greater than the
1875    value of RAND_MAX+1 on the system, the returned value will be in
1876    the range [0, RAND_MAX].  This may be fixed in a future release.
1877    The random number generator is seeded automatically the first time
1878    it is called.
1879
1880    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1881    for cryptography.  It is only meant to be used in situations where
1882    quality of the random numbers returned doesn't really matter.  */
1883
1884 int
1885 random_number (int max)
1886 {
1887 #ifdef HAVE_DRAND48
1888   if (!rnd_seeded)
1889     {
1890       srand48 ((long) time (NULL) ^ (long) getpid ());
1891       rnd_seeded = 1;
1892     }
1893   return lrand48 () % max;
1894 #else  /* not HAVE_DRAND48 */
1895
1896   double bounded;
1897   int rnd;
1898   if (!rnd_seeded)
1899     {
1900       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1901       rnd_seeded = 1;
1902     }
1903   rnd = rand ();
1904
1905   /* Like rand() % max, but uses the high-order bits for better
1906      randomness on architectures where rand() is implemented using a
1907      simple congruential generator.  */
1908
1909   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1910   return (int) bounded;
1911
1912 #endif /* not HAVE_DRAND48 */
1913 }
1914
1915 /* Return a random uniformly distributed floating point number in the
1916    [0, 1) range.  Uses drand48 where available, and a really lame
1917    kludge elsewhere.  */
1918
1919 double
1920 random_float (void)
1921 {
1922 #ifdef HAVE_DRAND48
1923   if (!rnd_seeded)
1924     {
1925       srand48 ((long) time (NULL) ^ (long) getpid ());
1926       rnd_seeded = 1;
1927     }
1928   return drand48 ();
1929 #else  /* not HAVE_DRAND48 */
1930   return (  random_number (10000) / 10000.0
1931           + random_number (10000) / (10000.0 * 10000.0)
1932           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1933           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1934 #endif /* not HAVE_DRAND48 */
1935 }
1936 \f
1937 /* Implementation of run_with_timeout, a generic timeout-forcing
1938    routine for systems with Unix-like signal handling.  */
1939
1940 #ifdef USE_SIGNAL_TIMEOUT
1941 # ifdef HAVE_SIGSETJMP
1942 #  define SETJMP(env) sigsetjmp (env, 1)
1943
1944 static sigjmp_buf run_with_timeout_env;
1945
1946 static void
1947 abort_run_with_timeout (int sig)
1948 {
1949   assert (sig == SIGALRM);
1950   siglongjmp (run_with_timeout_env, -1);
1951 }
1952 # else /* not HAVE_SIGSETJMP */
1953 #  define SETJMP(env) setjmp (env)
1954
1955 static jmp_buf run_with_timeout_env;
1956
1957 static void
1958 abort_run_with_timeout (int sig)
1959 {
1960   assert (sig == SIGALRM);
1961   /* We don't have siglongjmp to preserve the set of blocked signals;
1962      if we longjumped out of the handler at this point, SIGALRM would
1963      remain blocked.  We must unblock it manually. */
1964   sigset_t set;
1965   sigemptyset (&set);
1966   sigaddset (&set, SIGALRM);
1967   sigprocmask (SIG_BLOCK, &set, NULL);
1968
1969   /* Now it's safe to longjump. */
1970   longjmp (run_with_timeout_env, -1);
1971 }
1972 # endif /* not HAVE_SIGSETJMP */
1973
1974 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1975    setitimer where available, alarm otherwise.
1976
1977    TIMEOUT should be non-zero.  If the timeout value is so small that
1978    it would be rounded to zero, it is rounded to the least legal value
1979    instead (1us for setitimer, 1s for alarm).  That ensures that
1980    SIGALRM will be delivered in all cases.  */
1981
1982 static void
1983 alarm_set (double timeout)
1984 {
1985 #ifdef ITIMER_REAL
1986   /* Use the modern itimer interface. */
1987   struct itimerval itv;
1988   xzero (itv);
1989   itv.it_value.tv_sec = (long) timeout;
1990   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1991   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1992     /* Ensure that we wait for at least the minimum interval.
1993        Specifying zero would mean "wait forever".  */
1994     itv.it_value.tv_usec = 1;
1995   setitimer (ITIMER_REAL, &itv, NULL);
1996 #else  /* not ITIMER_REAL */
1997   /* Use the old alarm() interface. */
1998   int secs = (int) timeout;
1999   if (secs == 0)
2000     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
2001        because alarm(0) means "never deliver the alarm", i.e. "wait
2002        forever", which is not what someone who specifies a 0.5s
2003        timeout would expect.  */
2004     secs = 1;
2005   alarm (secs);
2006 #endif /* not ITIMER_REAL */
2007 }
2008
2009 /* Cancel the alarm set with alarm_set. */
2010
2011 static void
2012 alarm_cancel (void)
2013 {
2014 #ifdef ITIMER_REAL
2015   struct itimerval disable;
2016   xzero (disable);
2017   setitimer (ITIMER_REAL, &disable, NULL);
2018 #else  /* not ITIMER_REAL */
2019   alarm (0);
2020 #endif /* not ITIMER_REAL */
2021 }
2022
2023 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2024    seconds.  Returns true if the function was interrupted with a
2025    timeout, false otherwise.
2026
2027    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2028    using setitimer() or alarm().  The timeout is enforced by
2029    longjumping out of the SIGALRM handler.  This has several
2030    advantages compared to the traditional approach of relying on
2031    signals causing system calls to exit with EINTR:
2032
2033      * The callback function is *forcibly* interrupted after the
2034        timeout expires, (almost) regardless of what it was doing and
2035        whether it was in a syscall.  For example, a calculation that
2036        takes a long time is interrupted as reliably as an IO
2037        operation.
2038
2039      * It works with both SYSV and BSD signals because it doesn't
2040        depend on the default setting of SA_RESTART.
2041
2042      * It doesn't require special handler setup beyond a simple call
2043        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2044        optional.)
2045
2046    The only downside is that, if FUN allocates internal resources that
2047    are normally freed prior to exit from the functions, they will be
2048    lost in case of timeout.  */
2049
2050 bool
2051 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2052 {
2053   int saved_errno;
2054
2055   if (timeout == 0)
2056     {
2057       fun (arg);
2058       return false;
2059     }
2060
2061   signal (SIGALRM, abort_run_with_timeout);
2062   if (SETJMP (run_with_timeout_env) != 0)
2063     {
2064       /* Longjumped out of FUN with a timeout. */
2065       signal (SIGALRM, SIG_DFL);
2066       return true;
2067     }
2068   alarm_set (timeout);
2069   fun (arg);
2070
2071   /* Preserve errno in case alarm() or signal() modifies it. */
2072   saved_errno = errno;
2073   alarm_cancel ();
2074   signal (SIGALRM, SIG_DFL);
2075   errno = saved_errno;
2076
2077   return false;
2078 }
2079
2080 #else  /* not USE_SIGNAL_TIMEOUT */
2081
2082 #ifndef WINDOWS
2083 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2084    define it under Windows, because Windows has its own version of
2085    run_with_timeout that uses threads.  */
2086
2087 bool
2088 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2089 {
2090   fun (arg);
2091   return false;
2092 }
2093 #endif /* not WINDOWS */
2094 #endif /* not USE_SIGNAL_TIMEOUT */
2095 \f
2096 #ifndef WINDOWS
2097
2098 /* Sleep the specified amount of seconds.  On machines without
2099    nanosleep(), this may sleep shorter if interrupted by signals.  */
2100
2101 void
2102 xsleep (double seconds)
2103 {
2104 #ifdef HAVE_NANOSLEEP
2105   /* nanosleep is the preferred interface because it offers high
2106      accuracy and, more importantly, because it allows us to reliably
2107      restart receiving a signal such as SIGWINCH.  (There was an
2108      actual Debian bug report about --limit-rate malfunctioning while
2109      the terminal was being resized.)  */
2110   struct timespec sleep, remaining;
2111   sleep.tv_sec = (long) seconds;
2112   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2113   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2114     /* If nanosleep has been interrupted by a signal, adjust the
2115        sleeping period and return to sleep.  */
2116     sleep = remaining;
2117 #elif defined(HAVE_USLEEP)
2118   /* If usleep is available, use it in preference to select.  */
2119   if (seconds >= 1)
2120     {
2121       /* On some systems, usleep cannot handle values larger than
2122          1,000,000.  If the period is larger than that, use sleep
2123          first, then add usleep for subsecond accuracy.  */
2124       sleep (seconds);
2125       seconds -= (long) seconds;
2126     }
2127   usleep (seconds * 1000000);
2128 #else /* fall back select */
2129   /* Note that, although Windows supports select, it can't be used to
2130      implement sleeping because Winsock's select doesn't implement
2131      timeout when it is passed NULL pointers for all fd sets.  (But it
2132      does under Cygwin, which implements Unix-compatible select.)  */
2133   struct timeval sleep;
2134   sleep.tv_sec = (long) seconds;
2135   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2136   select (0, NULL, NULL, NULL, &sleep);
2137   /* If select returns -1 and errno is EINTR, it means we were
2138      interrupted by a signal.  But without knowing how long we've
2139      actually slept, we can't return to sleep.  Using gettimeofday to
2140      track sleeps is slow and unreliable due to clock skew.  */
2141 #endif
2142 }
2143
2144 #endif /* not WINDOWS */
2145
2146 /* Encode the octets in DATA of length LENGTH to base64 format,
2147    storing the result to DEST.  The output will be zero-terminated,
2148    and must point to a writable buffer of at least
2149    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2150    the resulting base64 data, not counting the terminating zero.
2151
2152    This implementation does not emit newlines after 76 characters of
2153    base64 data.  */
2154
2155 int
2156 base64_encode (const void *data, int length, char *dest)
2157 {
2158   /* Conversion table.  */
2159   static const char tbl[64] = {
2160     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2161     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2162     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2163     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2164   };
2165   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2166      don't work for data with MSB set. */
2167   const unsigned char *s = data;
2168   /* Theoretical ANSI violation when length < 3. */
2169   const unsigned char *end = (const unsigned char *) data + length - 2;
2170   char *p = dest;
2171
2172   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2173   for (; s < end; s += 3)
2174     {
2175       *p++ = tbl[s[0] >> 2];
2176       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2177       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2178       *p++ = tbl[s[2] & 0x3f];
2179     }
2180
2181   /* Pad the result if necessary...  */
2182   switch (length % 3)
2183     {
2184     case 1:
2185       *p++ = tbl[s[0] >> 2];
2186       *p++ = tbl[(s[0] & 3) << 4];
2187       *p++ = '=';
2188       *p++ = '=';
2189       break;
2190     case 2:
2191       *p++ = tbl[s[0] >> 2];
2192       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2193       *p++ = tbl[((s[1] & 0xf) << 2)];
2194       *p++ = '=';
2195       break;
2196     }
2197   /* ...and zero-terminate it.  */
2198   *p = '\0';
2199
2200   return p - dest;
2201 }
2202
2203 /* Store in C the next non-whitespace character from the string, or \0
2204    when end of string is reached.  */
2205 #define NEXT_CHAR(c, p) do {                    \
2206   c = (unsigned char) *p++;                     \
2207 } while (c_isspace (c))
2208
2209 #define IS_ASCII(c) (((c) & 0x80) == 0)
2210
2211 /* Decode data from BASE64 (a null-terminated string) into memory
2212    pointed to by DEST.  DEST is assumed to be large enough to
2213    accomodate the decoded data, which is guaranteed to be no more than
2214    3/4*strlen(base64).
2215
2216    Since DEST is assumed to contain binary data, it is not
2217    NUL-terminated.  The function returns the length of the data
2218    written to TO.  -1 is returned in case of error caused by malformed
2219    base64 input.
2220
2221    This function originates from Free Recode.  */
2222
2223 int
2224 base64_decode (const char *base64, void *dest)
2225 {
2226   /* Table of base64 values for first 128 characters.  Note that this
2227      assumes ASCII (but so does Wget in other places).  */
2228   static const signed char base64_char_to_value[128] =
2229     {
2230       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2231       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2232       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2233       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2234       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2235       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2236       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2237       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2238       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2239       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2240       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2241       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2242       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2243     };
2244 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2245 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2246
2247   const char *p = base64;
2248   char *q = dest;
2249
2250   while (1)
2251     {
2252       unsigned char c;
2253       unsigned long value;
2254
2255       /* Process first byte of a quadruplet.  */
2256       NEXT_CHAR (c, p);
2257       if (!c)
2258         break;
2259       if (c == '=' || !IS_BASE64 (c))
2260         return -1;              /* illegal char while decoding base64 */
2261       value = BASE64_CHAR_TO_VALUE (c) << 18;
2262
2263       /* Process second byte of a quadruplet.  */
2264       NEXT_CHAR (c, p);
2265       if (!c)
2266         return -1;              /* premature EOF while decoding base64 */
2267       if (c == '=' || !IS_BASE64 (c))
2268         return -1;              /* illegal char while decoding base64 */
2269       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2270       *q++ = value >> 16;
2271
2272       /* Process third byte of a quadruplet.  */
2273       NEXT_CHAR (c, p);
2274       if (!c)
2275         return -1;              /* premature EOF while decoding base64 */
2276       if (!IS_BASE64 (c))
2277         return -1;              /* illegal char while decoding base64 */
2278
2279       if (c == '=')
2280         {
2281           NEXT_CHAR (c, p);
2282           if (!c)
2283             return -1;          /* premature EOF while decoding base64 */
2284           if (c != '=')
2285             return -1;          /* padding `=' expected but not found */
2286           continue;
2287         }
2288
2289       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2290       *q++ = 0xff & value >> 8;
2291
2292       /* Process fourth byte of a quadruplet.  */
2293       NEXT_CHAR (c, p);
2294       if (!c)
2295         return -1;              /* premature EOF while decoding base64 */
2296       if (c == '=')
2297         continue;
2298       if (!IS_BASE64 (c))
2299         return -1;              /* illegal char while decoding base64 */
2300
2301       value |= BASE64_CHAR_TO_VALUE (c);
2302       *q++ = 0xff & value;
2303     }
2304 #undef IS_BASE64
2305 #undef BASE64_CHAR_TO_VALUE
2306
2307   return q - (char *) dest;
2308 }
2309
2310 #ifdef HAVE_LIBPCRE
2311 /* Compiles the PCRE regex. */
2312 void *
2313 compile_pcre_regex (const char *str)
2314 {
2315   const char *errbuf;
2316   int erroffset;
2317   pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
2318   if (! regex)
2319     {
2320       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2321                quote (str), errbuf);
2322       return false;
2323     }
2324   return regex;
2325 }
2326 #endif
2327
2328 /* Compiles the POSIX regex. */
2329 void *
2330 compile_posix_regex (const char *str)
2331 {
2332   regex_t *regex = xmalloc (sizeof (regex_t));
2333   int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
2334   if (errcode != 0)
2335     {
2336       int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
2337       char *errbuf = xmalloc (errbuf_size);
2338       regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
2339       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2340                quote (str), errbuf);
2341       xfree (errbuf);
2342       return NULL;
2343     }
2344
2345   return regex;
2346 }
2347
2348 #ifdef HAVE_LIBPCRE
2349 #define OVECCOUNT 30
2350 /* Matches a PCRE regex.  */
2351 bool
2352 match_pcre_regex (const void *regex, const char *str)
2353 {
2354   int l = strlen (str);
2355   int ovector[OVECCOUNT];
2356
2357   int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
2358   if (rc == PCRE_ERROR_NOMATCH)
2359     return false;
2360   else if (rc < 0)
2361     {
2362       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2363                  quote (str), rc);
2364       return false;
2365     }
2366   else
2367     return true;
2368 }
2369 #undef OVECCOUNT
2370 #endif
2371
2372 /* Matches a POSIX regex.  */
2373 bool
2374 match_posix_regex (const void *regex, const char *str)
2375 {
2376   int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
2377   if (rc == REG_NOMATCH)
2378     return false;
2379   else if (rc == 0)
2380     return true;
2381   else
2382     {
2383       int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
2384       char *errbuf = xmalloc (errbuf_size);
2385       regerror (rc, opt.acceptregex, errbuf, errbuf_size);
2386       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2387                  quote (str), rc);
2388       xfree (errbuf);
2389       return false;
2390     }
2391 }
2392
2393 #undef IS_ASCII
2394 #undef NEXT_CHAR
2395 \f
2396 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2397    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2398
2399 static void
2400 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2401                     int (*cmpfun) (const void *, const void *))
2402 {
2403 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2404   if (from < to)
2405     {
2406       size_t i, j, k;
2407       size_t mid = (to + from) / 2;
2408       mergesort_internal (base, temp, size, from, mid, cmpfun);
2409       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2410       i = from;
2411       j = mid + 1;
2412       for (k = from; (i <= mid) && (j <= to); k++)
2413         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2414           memcpy (ELT (temp, k), ELT (base, i++), size);
2415         else
2416           memcpy (ELT (temp, k), ELT (base, j++), size);
2417       while (i <= mid)
2418         memcpy (ELT (temp, k++), ELT (base, i++), size);
2419       while (j <= to)
2420         memcpy (ELT (temp, k++), ELT (base, j++), size);
2421       for (k = from; k <= to; k++)
2422         memcpy (ELT (base, k), ELT (temp, k), size);
2423     }
2424 #undef ELT
2425 }
2426
2427 /* Stable sort with interface exactly like standard library's qsort.
2428    Uses mergesort internally, allocating temporary storage with
2429    alloca.  */
2430
2431 void
2432 stable_sort (void *base, size_t nmemb, size_t size,
2433              int (*cmpfun) (const void *, const void *))
2434 {
2435   if (size > 1)
2436     {
2437       void *temp = alloca (nmemb * size * sizeof (void *));
2438       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2439     }
2440 }
2441 \f
2442 /* Print a decimal number.  If it is equal to or larger than ten, the
2443    number is rounded.  Otherwise it is printed with one significant
2444    digit without trailing zeros and with no more than three fractional
2445    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2446    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2447
2448    This is useful for displaying durations because it provides
2449    order-of-magnitude information without unnecessary clutter --
2450    long-running downloads are shown without the fractional part, and
2451    short ones still retain one significant digit.  */
2452
2453 const char *
2454 print_decimal (double number)
2455 {
2456   static char buf[32];
2457   double n = number >= 0 ? number : -number;
2458
2459   if (n >= 9.95)
2460     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2461        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2462     snprintf (buf, sizeof buf, "%.0f", number);
2463   else if (n >= 0.95)
2464     snprintf (buf, sizeof buf, "%.1f", number);
2465   else if (n >= 0.001)
2466     snprintf (buf, sizeof buf, "%.1g", number);
2467   else if (n >= 0.0005)
2468     /* round [0.0005, 0.001) to 0.001 */
2469     snprintf (buf, sizeof buf, "%.3f", number);
2470   else
2471     /* print numbers close to 0 as 0, not 0.000 */
2472     strcpy (buf, "0");
2473
2474   return buf;
2475 }
2476
2477 /* Get the maximum name length for the given path. */
2478 /* Return 0 if length is unknown. */
2479 size_t
2480 get_max_length (const char *path, int length, int name)
2481 {
2482   long ret;
2483   char *p, *d;
2484
2485   /* Make a copy of the path that we can modify. */
2486   p = path ? strdupdelim (path, path + length) : strdup ("");
2487
2488   for (;;)
2489     {
2490       errno = 0;
2491       /* For an empty path query the current directory. */
2492 #if HAVE_PATHCONF
2493       ret = pathconf (*p ? p : ".", name);
2494       if (!(ret < 0 && errno == ENOENT))
2495         break;
2496 #else
2497       ret = PATH_MAX;
2498 #endif
2499
2500       /* The path does not exist yet, but may be created. */
2501       /* Already at current or root directory, give up. */
2502       if (!*p || strcmp (p, "/") == 0)
2503         break;
2504
2505       /* Remove one directory level and try again. */
2506       d = strrchr (p, '/');
2507       if (d == p)
2508         p[1] = '\0';  /* check root directory */
2509       else if (d)
2510         *d = '\0';  /* remove last directory part */
2511       else
2512         *p = '\0';  /* check current directory */
2513     }
2514
2515   xfree (p);
2516
2517   if (ret < 0)
2518     {
2519       /* pathconf() has a message for us. */
2520       if (errno != 0)
2521           perror ("pathconf");
2522
2523       /* If (errno == 0) then there is no max length.
2524          Even on error return 0 so the caller can continue. */
2525       return 0;
2526     }
2527
2528   return ret;
2529 }
2530
2531 #ifdef TESTING
2532
2533 const char *
2534 test_subdir_p()
2535 {
2536   static struct {
2537     const char *d1;
2538     const char *d2;
2539     bool result;
2540   } test_array[] = {
2541     { "/somedir", "/somedir", true },
2542     { "/somedir", "/somedir/d2", true },
2543     { "/somedir/d1", "/somedir", false },
2544   };
2545   unsigned i;
2546
2547   for (i = 0; i < countof(test_array); ++i)
2548     {
2549       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2550
2551       mu_assert ("test_subdir_p: wrong result",
2552                  res == test_array[i].result);
2553     }
2554
2555   return NULL;
2556 }
2557
2558 const char *
2559 test_dir_matches_p()
2560 {
2561   static struct {
2562     const char *dirlist[3];
2563     const char *dir;
2564     bool result;
2565   } test_array[] = {
2566     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2567     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2568     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2569     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2570     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2571     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2572     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2573     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2574     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2575     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2576     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2577     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2578     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2579     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2580     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2581   };
2582   unsigned i;
2583
2584   for (i = 0; i < countof(test_array); ++i)
2585     {
2586       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2587
2588       mu_assert ("test_dir_matches_p: wrong result",
2589                  res == test_array[i].result);
2590     }
2591
2592   return NULL;
2593 }
2594
2595 #endif /* TESTING */
2596