sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #include <errno.h>
  46 #include <fcntl.h>
  47 #include <assert.h>
  48 #include <stdarg.h>
  49 #include <locale.h>
  50
  51 #if HAVE_UTIME
  52 # include <sys/types.h>
  53 # ifdef HAVE_UTIME_H
  54 #  include <utime.h>
  55 # endif
  56
  57 # ifdef HAVE_SYS_UTIME_H
  58 #  include <sys/utime.h>
  59 # endif
  60 #endif
  61
  62 #include <sys/stat.h>
  63
  64 /* For TIOCGWINSZ and friends: */
  65 #ifdef HAVE_SYS_IOCTL_H
  66 # include <sys/ioctl.h>
  67 #endif
  68 #ifdef HAVE_TERMIOS_H
  69 # include <termios.h>
  70 #endif
  71
  72 /* Needed for Unix version of run_with_timeout. */
  73 #include <signal.h>
  74 #include <setjmp.h>
  75
  76 #ifndef HAVE_SIGSETJMP
  77 /* If sigsetjmp is a macro, configure won't pick it up. */
  78 # ifdef sigsetjmp
  79 #  define HAVE_SIGSETJMP
  80 # endif
  81 #endif
  82
  83 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  84 # define USE_SIGNAL_TIMEOUT
  85 #endif
  86
  87 #include "utils.h"
  88 #include "hash.h"
  89
  90 #ifdef __VMS
  91 #include "vms.h"
  92 #endif /* def __VMS */
  93
  94 #ifdef TESTING
  95 #include "test.h"
  96 #endif
  97
  98 static void
  99 memfatal (const char *context, long attempted_size)
 100 {
 101   /* Make sure we don't try to store part of the log line, and thus
 102      call malloc.  */
 103   log_set_save_context (false);
 104
 105   /* We have different log outputs in different situations:
 106      1) output without bytes information
 107      2) output with bytes information  */
 108   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 109     {
 110       logprintf (LOG_ALWAYS,
 111                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 112                  exec_name, context);
 113     }
 114   else
 115     {
 116       logprintf (LOG_ALWAYS,
 117                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 118                  exec_name, context, attempted_size);
 119     }
 120
 121   exit (1);
 122 }
 123
 124 /* Character property table for (re-)escaping VMS ODS5 extended file
 125    names.  Note that this table ignores Unicode.
 126
 127    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 128
 129    ODS5 Invalid characters:
 130       C0 control codes (0x00 to 0x1F inclusive)
 131       Asterisk (*)
 132       Question mark (?)
 133
 134    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 135       Double quotation marks (")
 136       Backslash (\)
 137       Colon (:)
 138       Left angle bracket (<)
 139       Right angle bracket (>)
 140       Slash (/)
 141       Vertical bar (|)
 142
 143    Characters escaped by "^":
 144       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 145        @  [  \  ]  ^  `  {  |  }  ~
 146
 147    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 148    case.  Note that un-escaped < and > can also confuse a directory
 149    spec.
 150
 151    Characters put out as ^xx:
 152       7F (DEL)
 153       80-9F (C1 control characters)
 154       A0 (nonbreaking space)
 155       FF (Latin small letter y diaeresis)
 156
 157    Other cases:
 158       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 159
 160     Property table values:
 161       Normal escape:    1
 162       Space:            2
 163       Dot:              4
 164       Hex-hex escape:   8
 165       ODS2 normal:     16
 166       ODS2 lower case: 32
 167       Hex digit:       64
 168 */
 169
 170 unsigned char char_prop[ 256] = {
 171
 172 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 173     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 174
 175 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 176     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 177
 178 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 179     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 180
 181 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 182    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 183
 184 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 185     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 186
 187 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 188    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 189
 190 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 191     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 192
 193 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 194    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 195
 196     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 197     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 198     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 199     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 200     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 201     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 202     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 203     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 204 };
 205
 206 /* Utility function: like xstrdup(), but also lowercases S.  */
 207
 208 char *
 209 xstrdup_lower (const char *s)
 210 {
 211   char *copy = xstrdup (s);
 212   char *p = copy;
 213   for (; *p; p++)
 214     *p = c_tolower (*p);
 215   return copy;
 216 }
 217
 218 /* Copy the string formed by two pointers (one on the beginning, other
 219    on the char after the last char) to a new, malloc-ed location.
 220    0-terminate it.  */
 221 char *
 222 strdupdelim (const char *beg, const char *end)
 223 {
 224   char *res = xmalloc (end - beg + 1);
 225   memcpy (res, beg, end - beg);
 226   res[end - beg] = '\0';
 227   return res;
 228 }
 229
 230 /* Parse a string containing comma-separated elements, and return a
 231    vector of char pointers with the elements.  Spaces following the
 232    commas are ignored.  */
 233 char **
 234 sepstring (const char *s)
 235 {
 236   char **res;
 237   const char *p;
 238   int i = 0;
 239
 240   if (!s || !*s)
 241     return NULL;
 242   res = NULL;
 243   p = s;
 244   while (*s)
 245     {
 246       if (*s == ',')
 247         {
 248           res = xrealloc (res, (i + 2) * sizeof (char *));
 249           res[i] = strdupdelim (p, s);
 250           res[++i] = NULL;
 251           ++s;
 252           /* Skip the blanks following the ','.  */
 253           while (c_isspace (*s))
 254             ++s;
 255           p = s;
 256         }
 257       else
 258         ++s;
 259     }
 260   res = xrealloc (res, (i + 2) * sizeof (char *));
 261   res[i] = strdupdelim (p, s);
 262   res[i + 1] = NULL;
 263   return res;
 264 }
 265 \f
 266 /* Like sprintf, but prints into a string of sufficient size freshly
 267    allocated with malloc, which is returned.  If unable to print due
 268    to invalid format, returns NULL.  Inability to allocate needed
 269    memory results in abort, as with xmalloc.  This is in spirit
 270    similar to the GNU/BSD extension asprintf, but somewhat easier to
 271    use.
 272
 273    Internally the function either calls vasprintf or loops around
 274    vsnprintf until the correct size is found.  Since Wget also ships a
 275    fallback implementation of vsnprintf, this should be portable.  */
 276
 277 /* Constant is using for limits memory allocation for text buffer.
 278    Applicable in situation when: vasprintf is not available in the system
 279    and vsnprintf return -1 when long line is truncated (in old versions of
 280    glibc and in other system where C99 doesn`t support) */
 281
 282 #define FMT_MAX_LENGTH 1048576
 283
 284 char *
 285 aprintf (const char *fmt, ...)
 286 {
 287 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 288   /* Use vasprintf. */
 289   int ret;
 290   va_list args;
 291   char *str;
 292   va_start (args, fmt);
 293   ret = vasprintf (&str, fmt, args);
 294   va_end (args);
 295   if (ret < 0 && errno == ENOMEM)
 296     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 297                                                       with xmalloc/xrealloc */
 298   else if (ret < 0)
 299     return NULL;
 300   return str;
 301 #else  /* not HAVE_VASPRINTF */
 302
 303   /* vasprintf is unavailable.  snprintf into a small buffer and
 304      resize it as necessary. */
 305   int size = 32;
 306   char *str = xmalloc (size);
 307
 308   /* #### This code will infloop and eventually abort in xrealloc if
 309      passed a FMT that causes snprintf to consistently return -1.  */
 310
 311   while (1)
 312     {
 313       int n;
 314       va_list args;
 315
 316       va_start (args, fmt);
 317       n = vsnprintf (str, size, fmt, args);
 318       va_end (args);
 319
 320       /* If the printing worked, return the string. */
 321       if (n > -1 && n < size)
 322         return str;
 323
 324       /* Else try again with a larger buffer. */
 325       if (n > -1)               /* C99 */
 326         size = n + 1;           /* precisely what is needed */
 327       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 328         {                               /* maybe we have some wrong
 329                                            format string? */
 330           logprintf (LOG_ALWAYS,
 331                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 332                        "aborting.\n"),
 333                      exec_name, size);  /* printout a log message */
 334           abort ();                     /* and abort... */
 335         }
 336       else
 337         {
 338           /* else, we continue to grow our
 339            * buffer: Twice the old size. */
 340           size <<= 1;
 341         }
 342       str = xrealloc (str, size);
 343     }
 344 #endif /* not HAVE_VASPRINTF */
 345 }
 346
 347 /* Concatenate the NULL-terminated list of string arguments into
 348    freshly allocated space.  */
 349
 350 char *
 351 concat_strings (const char *str0, ...)
 352 {
 353   va_list args;
 354   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 355   char *ret, *p;
 356
 357   const char *next_str;
 358   int total_length = 0;
 359   size_t argcount;
 360
 361   /* Calculate the length of and allocate the resulting string. */
 362
 363   argcount = 0;
 364   va_start (args, str0);
 365   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 366     {
 367       int len = strlen (next_str);
 368       if (argcount < countof (saved_lengths))
 369         saved_lengths[argcount++] = len;
 370       total_length += len;
 371     }
 372   va_end (args);
 373   p = ret = xmalloc (total_length + 1);
 374
 375   /* Copy the strings into the allocated space. */
 376
 377   argcount = 0;
 378   va_start (args, str0);
 379   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 380     {
 381       int len;
 382       if (argcount < countof (saved_lengths))
 383         len = saved_lengths[argcount++];
 384       else
 385         len = strlen (next_str);
 386       memcpy (p, next_str, len);
 387       p += len;
 388     }
 389   va_end (args);
 390   *p = '\0';
 391
 392   return ret;
 393 }
 394 \f
 395 /* Format the provided time according to the specified format.  The
 396    format is a string with format elements supported by strftime.  */
 397
 398 static char *
 399 fmttime (time_t t, const char *fmt)
 400 {
 401   static char output[32];
 402   struct tm *tm = localtime(&t);
 403   if (!tm)
 404     abort ();
 405   if (!strftime(output, sizeof(output), fmt, tm))
 406     abort ();
 407   return output;
 408 }
 409
 410 /* Return pointer to a static char[] buffer in which zero-terminated
 411    string-representation of TM (in form hh:mm:ss) is printed.
 412
 413    If TM is NULL, the current time will be used.  */
 414
 415 char *
 416 time_str (time_t t)
 417 {
 418   return fmttime(t, "%H:%M:%S");
 419 }
 420
 421 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 422
 423 char *
 424 datetime_str (time_t t)
 425 {
 426   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 427 }
 428 \f
 429 /* The Windows versions of the following two functions are defined in
 430    mswindows.c. On MSDOS this function should never be called. */
 431
 432 #ifdef __VMS
 433
 434 void
 435 fork_to_background (void)
 436 {
 437   return;
 438 }
 439
 440 #else /* def __VMS */
 441
 442 #if !defined(WINDOWS) && !defined(MSDOS)
 443 void
 444 fork_to_background (void)
 445 {
 446   pid_t pid;
 447   /* Whether we arrange our own version of opt.lfilename here.  */
 448   bool logfile_changed = false;
 449
 450   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 451     {
 452       /* We must create the file immediately to avoid either a race
 453          condition (which arises from using unique_name and failing to
 454          use fopen_excl) or lying to the user about the log file name
 455          (which arises from using unique_name, printing the name, and
 456          using fopen_excl later on.)  */
 457       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 458       if (new_log_fp)
 459         {
 460           logfile_changed = true;
 461           fclose (new_log_fp);
 462         }
 463     }
 464   pid = fork ();
 465   if (pid < 0)
 466     {
 467       /* parent, error */
 468       perror ("fork");
 469       exit (1);
 470     }
 471   else if (pid != 0)
 472     {
 473       /* parent, no error */
 474       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 475       if (logfile_changed)
 476         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 477       exit (0);                 /* #### should we use _exit()? */
 478     }
 479
 480   /* child: give up the privileges and keep running. */
 481   setsid ();
 482   freopen ("/dev/null", "r", stdin);
 483   freopen ("/dev/null", "w", stdout);
 484   freopen ("/dev/null", "w", stderr);
 485 }
 486 #endif /* !WINDOWS && !MSDOS */
 487
 488 #endif /* def __VMS [else] */
 489
 490 \f
 491 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 492    specified with TM.  The atime ("access time") is set to the current
 493    time.  */
 494
 495 void
 496 touch (const char *file, time_t tm)
 497 {
 498 #if HAVE_UTIME
 499 # ifdef HAVE_STRUCT_UTIMBUF
 500   struct utimbuf times;
 501 # else
 502   struct {
 503     time_t actime;
 504     time_t modtime;
 505   } times;
 506 # endif
 507   times.modtime = tm;
 508   times.actime = time (NULL);
 509   if (utime (file, &times) == -1)
 510     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 511 #else
 512   struct timespec timespecs[2];
 513   int fd;
 514
 515   fd = open (file, O_WRONLY);
 516   if (fd < 0)
 517     {
 518       logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
 519       return;
 520     }
 521
 522   timespecs[0].tv_sec = time (NULL);
 523   timespecs[0].tv_nsec = 0L;
 524   timespecs[1].tv_sec = tm;
 525   timespecs[1].tv_nsec = 0L;
 526
 527   if (futimens (fd, timespecs) == -1)
 528     logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
 529
 530   close (fd);
 531 #endif
 532 }
 533
 534 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 535    nothing under MS-Windows.  */
 536 int
 537 remove_link (const char *file)
 538 {
 539   int err = 0;
 540   struct_stat st;
 541
 542   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 543     {
 544       DEBUGP (("Unlinking %s (symlink).\n", file));
 545       err = unlink (file);
 546       if (err != 0)
 547         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 548                    quote (file), strerror (errno));
 549     }
 550   return err;
 551 }
 552
 553 /* Does FILENAME exist?  This is quite a lousy implementation, since
 554    it supplies no error codes -- only a yes-or-no answer.  Thus it
 555    will return that a file does not exist if, e.g., the directory is
 556    unreadable.  I don't mind it too much currently, though.  The
 557    proper way should, of course, be to have a third, error state,
 558    other than true/false, but that would introduce uncalled-for
 559    additional complexity to the callers.  */
 560 bool
 561 file_exists_p (const char *filename)
 562 {
 563 #ifdef HAVE_ACCESS
 564   return access (filename, F_OK) >= 0;
 565 #else
 566   struct_stat buf;
 567   return stat (filename, &buf) >= 0;
 568 #endif
 569 }
 570
 571 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 572    Returns 0 on error.  */
 573 bool
 574 file_non_directory_p (const char *path)
 575 {
 576   struct_stat buf;
 577   /* Use lstat() rather than stat() so that symbolic links pointing to
 578      directories can be identified correctly.  */
 579   if (lstat (path, &buf) != 0)
 580     return false;
 581   return S_ISDIR (buf.st_mode) ? false : true;
 582 }
 583
 584 /* Return the size of file named by FILENAME, or -1 if it cannot be
 585    opened or seeked into. */
 586 wgint
 587 file_size (const char *filename)
 588 {
 589 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 590   wgint size;
 591   /* We use fseek rather than stat to determine the file size because
 592      that way we can also verify that the file is readable without
 593      explicitly checking for permissions.  Inspired by the POST patch
 594      by Arnaud Wylie.  */
 595   FILE *fp = fopen (filename, "rb");
 596   if (!fp)
 597     return -1;
 598   fseeko (fp, 0, SEEK_END);
 599   size = ftello (fp);
 600   fclose (fp);
 601   return size;
 602 #else
 603   struct_stat st;
 604   if (stat (filename, &st) < 0)
 605     return -1;
 606   return st.st_size;
 607 #endif
 608 }
 609
 610 /* 2005-02-19 SMS.
 611    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 612    original name.  With the VMS file systems' versioning, everything
 613    should be fine, and appending ".NN" just causes trouble.
 614 */
 615
 616 #ifdef UNIQ_SEP
 617
 618 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 619    doesn't exist is found.  Return a freshly allocated copy of the
 620    unused file name.  */
 621
 622 static char *
 623 unique_name_1 (const char *prefix)
 624 {
 625   int count = 1;
 626   int plen = strlen (prefix);
 627   char *template = (char *)alloca (plen + 1 + 24);
 628   char *template_tail = template + plen;
 629
 630   memcpy (template, prefix, plen);
 631   *template_tail++ = UNIQ_SEP;
 632
 633   do
 634     number_to_string (template_tail, count++);
 635   while (file_exists_p (template));
 636
 637   return xstrdup (template);
 638 }
 639
 640 /* Return a unique file name, based on FILE.
 641
 642    More precisely, if FILE doesn't exist, it is returned unmodified.
 643    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 644    file name that doesn't exist is returned.
 645
 646    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 647
 648    The resulting file is not created, only verified that it didn't
 649    exist at the point in time when the function was called.
 650    Therefore, where security matters, don't rely that the file created
 651    by this function exists until you open it with O_EXCL or
 652    equivalent.
 653
 654    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 655    string.  Otherwise, it may return FILE if the file doesn't exist
 656    (and therefore doesn't need changing).  */
 657
 658 char *
 659 unique_name (const char *file, bool allow_passthrough)
 660 {
 661   /* If the FILE itself doesn't exist, return it without
 662      modification. */
 663   if (!file_exists_p (file))
 664     return allow_passthrough ? (char *)file : xstrdup (file);
 665
 666   /* Otherwise, find a numeric suffix that results in unused file name
 667      and return it.  */
 668   return unique_name_1 (file);
 669 }
 670
 671 #else /* def UNIQ_SEP */
 672
 673 /* Dummy unique_name() for VMS.  Return the original name as easily as
 674    possible.
 675 */
 676 char *
 677 unique_name (const char *file, bool allow_passthrough)
 678 {
 679   /* Return the FILE itself, without modification, irregardful. */
 680   return allow_passthrough ? (char *)file : xstrdup (file);
 681 }
 682
 683 #endif /* def UNIQ_SEP [else] */
 684
 685 /* Create a file based on NAME, except without overwriting an existing
 686    file with that name.  Providing O_EXCL is correctly implemented,
 687    this function does not have the race condition associated with
 688    opening the file returned by unique_name.  */
 689
 690 FILE *
 691 unique_create (const char *name, bool binary, char **opened_name)
 692 {
 693   /* unique file name, based on NAME */
 694   char *uname = unique_name (name, false);
 695   FILE *fp;
 696   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 697     {
 698       xfree (uname);
 699       uname = unique_name (name, false);
 700     }
 701   if (opened_name && fp != NULL)
 702     {
 703       if (fp)
 704         *opened_name = uname;
 705       else
 706         {
 707           *opened_name = NULL;
 708           xfree (uname);
 709         }
 710     }
 711   else
 712     xfree (uname);
 713   return fp;
 714 }
 715
 716 /* Open the file for writing, with the addition that the file is
 717    opened "exclusively".  This means that, if the file already exists,
 718    this function will *fail* and errno will be set to EEXIST.  If
 719    BINARY is set, the file will be opened in binary mode, equivalent
 720    to fopen's "wb".
 721
 722    If opening the file fails for any reason, including the file having
 723    previously existed, this function returns NULL and sets errno
 724    appropriately.  */
 725
 726 FILE *
 727 fopen_excl (const char *fname, int binary)
 728 {
 729   int fd;
 730 #ifdef O_EXCL
 731
 732 /* 2005-04-14 SMS.
 733    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 734    It also has file versions which obviate all the O_EXCL effort.
 735    O_TRUNC (something of a misnomer) requests a new version.
 736 */
 737 # ifdef __VMS
 738 /* Common open() optional arguments:
 739    sequential access only, access callback function.
 740 */
 741 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 742
 743   int open_id;
 744   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 745
 746   if (binary > 1)
 747     {
 748       open_id = 11;
 749       fd = open( fname,                 /* File name. */
 750        flags,                           /* Flags. */
 751        0777,                            /* Mode for default protection. */
 752        "ctx=bin,stm",                   /* Binary, stream access. */
 753        "rfm=stmlf",                     /* Stream_LF. */
 754        OPEN_OPT_ARGS);                  /* Access callback. */
 755     }
 756   else if (binary)
 757     {
 758       open_id = 12;
 759       fd = open( fname,                 /* File name. */
 760        flags,                           /* Flags. */
 761        0777,                            /* Mode for default protection. */
 762        "ctx=bin,stm",                   /* Binary, stream access. */
 763        "rfm=fix",                       /* Fixed-length, */
 764        "mrs=512",                       /* 512-byte records. */
 765        OPEN_OPT_ARGS);                  /* Access callback. */
 766     }
 767   else
 768     {
 769       open_id = 13;
 770       fd = open( fname,                 /* File name. */
 771        flags,                           /* Flags. */
 772        0777,                            /* Mode for default protection.
 773 */
 774        "rfm=stmlf",                     /* Stream_LF. */
 775        OPEN_OPT_ARGS);                  /* Access callback. */
 776     }
 777 # else /* def __VMS */
 778   int flags = O_WRONLY | O_CREAT | O_EXCL;
 779 # ifdef O_BINARY
 780   if (binary)
 781     flags |= O_BINARY;
 782 # endif
 783   fd = open (fname, flags, 0666);
 784 # endif /* def __VMS [else] */
 785
 786   if (fd < 0)
 787     return NULL;
 788   return fdopen (fd, binary ? "wb" : "w");
 789 #else  /* not O_EXCL */
 790   /* Manually check whether the file exists.  This is prone to race
 791      conditions, but systems without O_EXCL haven't deserved
 792      better.  */
 793   if (file_exists_p (fname))
 794     {
 795       errno = EEXIST;
 796       return NULL;
 797     }
 798   return fopen (fname, binary ? "wb" : "w");
 799 #endif /* not O_EXCL */
 800 }
 801 \f
 802 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 803    are missing, create them first.  In case any mkdir() call fails,
 804    return its error status.  Returns 0 on successful completion.
 805
 806    The behaviour of this function should be identical to the behaviour
 807    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 808 int
 809 make_directory (const char *directory)
 810 {
 811   int i, ret, quit = 0;
 812   char *dir;
 813
 814   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 815      function is unsafe if called with a read-only char *argument.  */
 816   STRDUP_ALLOCA (dir, directory);
 817
 818   /* If the first character of dir is '/', skip it (and thus enable
 819      creation of absolute-pathname directories.  */
 820   for (i = (*dir == '/'); 1; ++i)
 821     {
 822       for (; dir[i] && dir[i] != '/'; i++)
 823         ;
 824       if (!dir[i])
 825         quit = 1;
 826       dir[i] = '\0';
 827       /* Check whether the directory already exists.  Allow creation of
 828          of intermediate directories to fail, as the initial path components
 829          are not necessarily directories!  */
 830       if (!file_exists_p (dir))
 831         ret = mkdir (dir, 0777);
 832       else
 833         ret = 0;
 834       if (quit)
 835         break;
 836       else
 837         dir[i] = '/';
 838     }
 839   return ret;
 840 }
 841
 842 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 843    should be a file name.
 844
 845    file_merge("/foo/bar", "baz")  => "/foo/baz"
 846    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 847    file_merge("foo", "bar")       => "bar"
 848
 849    In other words, it's a simpler and gentler version of uri_merge.  */
 850
 851 char *
 852 file_merge (const char *base, const char *file)
 853 {
 854   char *result;
 855   const char *cut = (const char *)strrchr (base, '/');
 856
 857   if (!cut)
 858     return xstrdup (file);
 859
 860   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 861   memcpy (result, base, cut - base);
 862   result[cut - base] = '/';
 863   strcpy (result + (cut - base) + 1, file);
 864
 865   return result;
 866 }
 867 \f
 868 /* Like fnmatch, but performs a case-insensitive match.  */
 869
 870 int
 871 fnmatch_nocase (const char *pattern, const char *string, int flags)
 872 {
 873 #ifdef FNM_CASEFOLD
 874   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 875      also present on *BSD platforms, and possibly elsewhere.  */
 876   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 877 #else
 878   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 879   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 880   char *strcopy = (char *) alloca (strlen (string) + 1);
 881   char *p;
 882   for (p = patcopy; *pattern; pattern++, p++)
 883     *p = c_tolower (*pattern);
 884   *p = '\0';
 885   for (p = strcopy; *string; string++, p++)
 886     *p = c_tolower (*string);
 887   *p = '\0';
 888   return fnmatch (patcopy, strcopy, flags);
 889 #endif
 890 }
 891
 892 static bool in_acclist (const char *const *, const char *, bool);
 893
 894 /* Determine whether a file is acceptable to be followed, according to
 895    lists of patterns to accept/reject.  */
 896 bool
 897 acceptable (const char *s)
 898 {
 899   int l = strlen (s);
 900
 901   if (opt.output_document && strcmp (s, opt.output_document) == 0)
 902     return true;
 903
 904   while (l && s[l] != '/')
 905     --l;
 906   if (s[l] == '/')
 907     s += (l + 1);
 908   if (opt.accepts)
 909     {
 910       if (opt.rejects)
 911         return (in_acclist ((const char *const *)opt.accepts, s, true)
 912                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 913       else
 914         return in_acclist ((const char *const *)opt.accepts, s, true);
 915     }
 916   else if (opt.rejects)
 917     return !in_acclist ((const char *const *)opt.rejects, s, true);
 918   return true;
 919 }
 920
 921 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 922    will return true if and only if D2 begins with `/something/' or is exactly
 923    '/something'.  */
 924 bool
 925 subdir_p (const char *d1, const char *d2)
 926 {
 927   if (*d1 == '\0')
 928     return true;
 929   if (!opt.ignore_case)
 930     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 931       ;
 932   else
 933     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 934       ;
 935
 936   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 937 }
 938
 939 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 940    first element that matches DIR, through wildcards or front comparison (as
 941    appropriate).  */
 942 static bool
 943 dir_matches_p (char **dirlist, const char *dir)
 944 {
 945   char **x;
 946   int (*matcher) (const char *, const char *, int)
 947     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 948
 949   for (x = dirlist; *x; x++)
 950     {
 951       /* Remove leading '/' */
 952       char *p = *x + (**x == '/');
 953       if (has_wildcards_p (p))
 954         {
 955           if (matcher (p, dir, FNM_PATHNAME) == 0)
 956             break;
 957         }
 958       else
 959         {
 960           if (subdir_p (p, dir))
 961             break;
 962         }
 963     }
 964
 965   return *x ? true : false;
 966 }
 967
 968 /* Returns whether DIRECTORY is acceptable for download, wrt the
 969    include/exclude lists.
 970
 971    The leading `/' is ignored in paths; relative and absolute paths
 972    may be freely intermixed.  */
 973
 974 bool
 975 accdir (const char *directory)
 976 {
 977   /* Remove starting '/'.  */
 978   if (*directory == '/')
 979     ++directory;
 980   if (opt.includes)
 981     {
 982       if (!dir_matches_p (opt.includes, directory))
 983         return false;
 984     }
 985   if (opt.excludes)
 986     {
 987       if (dir_matches_p (opt.excludes, directory))
 988         return false;
 989     }
 990   return true;
 991 }
 992
 993 /* Return true if STRING ends with TAIL.  For instance:
 994
 995    match_tail ("abc", "bc", false)  -> 1
 996    match_tail ("abc", "ab", false)  -> 0
 997    match_tail ("abc", "abc", false) -> 1
 998
 999    If FOLD_CASE is true, the comparison will be case-insensitive.  */
1000
1001 bool
1002 match_tail (const char *string, const char *tail, bool fold_case)
1003 {
1004   int i, j;
1005
1006   /* We want this to be fast, so we code two loops, one with
1007      case-folding, one without. */
1008
1009   if (!fold_case)
1010     {
1011       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1012         if (string[i] != tail[j])
1013           break;
1014     }
1015   else
1016     {
1017       for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
1018         if (c_tolower (string[i]) != c_tolower (tail[j]))
1019           break;
1020     }
1021
1022   /* If the tail was exhausted, the match was succesful.  */
1023   if (j == -1)
1024     return true;
1025   else
1026     return false;
1027 }
1028
1029 /* Checks whether string S matches each element of ACCEPTS.  A list
1030    element are matched either with fnmatch() or match_tail(),
1031    according to whether the element contains wildcards or not.
1032
1033    If the BACKWARD is false, don't do backward comparison -- just compare
1034    them normally.  */
1035 static bool
1036 in_acclist (const char *const *accepts, const char *s, bool backward)
1037 {
1038   for (; *accepts; accepts++)
1039     {
1040       if (has_wildcards_p (*accepts))
1041         {
1042           int res = opt.ignore_case
1043             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1044           /* fnmatch returns 0 if the pattern *does* match the string.  */
1045           if (res == 0)
1046             return true;
1047         }
1048       else
1049         {
1050           if (backward)
1051             {
1052               if (match_tail (s, *accepts, opt.ignore_case))
1053                 return true;
1054             }
1055           else
1056             {
1057               int cmp = opt.ignore_case
1058                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1059               if (cmp == 0)
1060                 return true;
1061             }
1062         }
1063     }
1064   return false;
1065 }
1066
1067 /* Return the location of STR's suffix (file extension).  Examples:
1068    suffix ("foo.bar")       -> "bar"
1069    suffix ("foo.bar.baz")   -> "baz"
1070    suffix ("/foo/bar")      -> NULL
1071    suffix ("/foo.bar/baz")  -> NULL  */
1072 char *
1073 suffix (const char *str)
1074 {
1075   int i;
1076
1077   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
1078     ;
1079
1080   if (str[i++] == '.')
1081     return (char *)str + i;
1082   else
1083     return NULL;
1084 }
1085
1086 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1087    `]').  */
1088
1089 bool
1090 has_wildcards_p (const char *s)
1091 {
1092   for (; *s; s++)
1093     if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
1094       return true;
1095   return false;
1096 }
1097
1098 /* Return true if FNAME ends with a typical HTML suffix.  The
1099    following (case-insensitive) suffixes are presumed to be HTML
1100    files:
1101
1102      html
1103      htm
1104      ?html (`?' matches one character)
1105
1106    #### CAVEAT.  This is not necessarily a good indication that FNAME
1107    refers to a file that contains HTML!  */
1108 bool
1109 has_html_suffix_p (const char *fname)
1110 {
1111   char *suf;
1112
1113   if ((suf = suffix (fname)) == NULL)
1114     return false;
1115   if (!strcasecmp (suf, "html"))
1116     return true;
1117   if (!strcasecmp (suf, "htm"))
1118     return true;
1119   if (suf[0] && !strcasecmp (suf + 1, "html"))
1120     return true;
1121   return false;
1122 }
1123
1124 /* Read a line from FP and return the pointer to freshly allocated
1125    storage.  The storage space is obtained through malloc() and should
1126    be freed with free() when it is no longer needed.
1127
1128    The length of the line is not limited, except by available memory.
1129    The newline character at the end of line is retained.  The line is
1130    terminated with a zero character.
1131
1132    After end-of-file is encountered without anything being read, NULL
1133    is returned.  NULL is also returned on error.  To distinguish
1134    between these two cases, use the stdio function ferror().  */
1135
1136 char *
1137 read_whole_line (FILE *fp)
1138 {
1139   int length = 0;
1140   int bufsize = 82;
1141   char *line = xmalloc (bufsize);
1142
1143   while (fgets (line + length, bufsize - length, fp))
1144     {
1145       length += strlen (line + length);
1146       if (length == 0)
1147         /* Possible for example when reading from a binary file where
1148            a line begins with \0.  */
1149         continue;
1150
1151       if (line[length - 1] == '\n')
1152         break;
1153
1154       /* fgets() guarantees to read the whole line, or to use up the
1155          space we've given it.  We can double the buffer
1156          unconditionally.  */
1157       bufsize <<= 1;
1158       line = xrealloc (line, bufsize);
1159     }
1160   if (length == 0 || ferror (fp))
1161     {
1162       xfree (line);
1163       return NULL;
1164     }
1165   if (length + 1 < bufsize)
1166     /* Relieve the memory from our exponential greediness.  We say
1167        `length + 1' because the terminating \0 is not included in
1168        LENGTH.  We don't need to zero-terminate the string ourselves,
1169        though, because fgets() does that.  */
1170     line = xrealloc (line, length + 1);
1171   return line;
1172 }
1173 \f
1174 /* Read FILE into memory.  A pointer to `struct file_memory' are
1175    returned; use struct element `content' to access file contents, and
1176    the element `length' to know the file length.  `content' is *not*
1177    zero-terminated, and you should *not* read or write beyond the [0,
1178    length) range of characters.
1179
1180    After you are done with the file contents, call wget_read_file_free to
1181    release the memory.
1182
1183    Depending on the operating system and the type of file that is
1184    being read, wget_read_file() either mmap's the file into memory, or
1185    reads the file into the core using read().
1186
1187    If file is named "-", fileno(stdin) is used for reading instead.
1188    If you want to read from a real file named "-", use "./-" instead.  */
1189
1190 struct file_memory *
1191 wget_read_file (const char *file)
1192 {
1193   int fd;
1194   struct file_memory *fm;
1195   long size;
1196   bool inhibit_close = false;
1197
1198   /* Some magic in the finest tradition of Perl and its kin: if FILE
1199      is "-", just use stdin.  */
1200   if (HYPHENP (file))
1201     {
1202       fd = fileno (stdin);
1203       inhibit_close = true;
1204       /* Note that we don't inhibit mmap() in this case.  If stdin is
1205          redirected from a regular file, mmap() will still work.  */
1206     }
1207   else
1208     fd = open (file, O_RDONLY);
1209   if (fd < 0)
1210     return NULL;
1211   fm = xnew (struct file_memory);
1212
1213 #ifdef HAVE_MMAP
1214   {
1215     struct_fstat buf;
1216     if (fstat (fd, &buf) < 0)
1217       goto mmap_lose;
1218     fm->length = buf.st_size;
1219     /* NOTE: As far as I know, the callers of this function never
1220        modify the file text.  Relying on this would enable us to
1221        specify PROT_READ and MAP_SHARED for a marginal gain in
1222        efficiency, but at some cost to generality.  */
1223     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1224                         MAP_PRIVATE, fd, 0);
1225     if (fm->content == (char *)MAP_FAILED)
1226       goto mmap_lose;
1227     if (!inhibit_close)
1228       close (fd);
1229
1230     fm->mmap_p = 1;
1231     return fm;
1232   }
1233
1234  mmap_lose:
1235   /* The most common reason why mmap() fails is that FD does not point
1236      to a plain file.  However, it's also possible that mmap() doesn't
1237      work for a particular type of file.  Therefore, whenever mmap()
1238      fails, we just fall back to the regular method.  */
1239 #endif /* HAVE_MMAP */
1240
1241   fm->length = 0;
1242   size = 512;                   /* number of bytes fm->contents can
1243                                    hold at any given time. */
1244   fm->content = xmalloc (size);
1245   while (1)
1246     {
1247       wgint nread;
1248       if (fm->length > size / 2)
1249         {
1250           /* #### I'm not sure whether the whole exponential-growth
1251              thing makes sense with kernel read.  On Linux at least,
1252              read() refuses to read more than 4K from a file at a
1253              single chunk anyway.  But other Unixes might optimize it
1254              better, and it doesn't *hurt* anything, so I'm leaving
1255              it.  */
1256
1257           /* Normally, we grow SIZE exponentially to make the number
1258              of calls to read() and realloc() logarithmic in relation
1259              to file size.  However, read() can read an amount of data
1260              smaller than requested, and it would be unreasonable to
1261              double SIZE every time *something* was read.  Therefore,
1262              we double SIZE only when the length exceeds half of the
1263              entire allocated size.  */
1264           size <<= 1;
1265           fm->content = xrealloc (fm->content, size);
1266         }
1267       nread = read (fd, fm->content + fm->length, size - fm->length);
1268       if (nread > 0)
1269         /* Successful read. */
1270         fm->length += nread;
1271       else if (nread < 0)
1272         /* Error. */
1273         goto lose;
1274       else
1275         /* EOF */
1276         break;
1277     }
1278   if (!inhibit_close)
1279     close (fd);
1280   if (size > fm->length && fm->length != 0)
1281     /* Due to exponential growth of fm->content, the allocated region
1282        might be much larger than what is actually needed.  */
1283     fm->content = xrealloc (fm->content, fm->length);
1284   fm->mmap_p = 0;
1285   return fm;
1286
1287  lose:
1288   if (!inhibit_close)
1289     close (fd);
1290   xfree (fm->content);
1291   xfree (fm);
1292   return NULL;
1293 }
1294
1295 /* Release the resources held by FM.  Specifically, this calls
1296    munmap() or xfree() on fm->content, depending whether mmap or
1297    malloc/read were used to read in the file.  It also frees the
1298    memory needed to hold the FM structure itself.  */
1299
1300 void
1301 wget_read_file_free (struct file_memory *fm)
1302 {
1303 #ifdef HAVE_MMAP
1304   if (fm->mmap_p)
1305     {
1306       munmap (fm->content, fm->length);
1307     }
1308   else
1309 #endif
1310     {
1311       xfree (fm->content);
1312     }
1313   xfree (fm);
1314 }
1315 \f
1316 /* Free the pointers in a NULL-terminated vector of pointers, then
1317    free the pointer itself.  */
1318 void
1319 free_vec (char **vec)
1320 {
1321   if (vec)
1322     {
1323       char **p = vec;
1324       while (*p)
1325         xfree (*p++);
1326       xfree (vec);
1327     }
1328 }
1329
1330 /* Append vector V2 to vector V1.  The function frees V2 and
1331    reallocates V1 (thus you may not use the contents of neither
1332    pointer after the call).  If V1 is NULL, V2 is returned.  */
1333 char **
1334 merge_vecs (char **v1, char **v2)
1335 {
1336   int i, j;
1337
1338   if (!v1)
1339     return v2;
1340   if (!v2)
1341     return v1;
1342   if (!*v2)
1343     {
1344       /* To avoid j == 0 */
1345       xfree (v2);
1346       return v1;
1347     }
1348   /* Count v1.  */
1349   for (i = 0; v1[i]; i++)
1350     ;
1351   /* Count v2.  */
1352   for (j = 0; v2[j]; j++)
1353     ;
1354   /* Reallocate v1.  */
1355   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1356   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1357   xfree (v2);
1358   return v1;
1359 }
1360
1361 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1362    is allocated as needed.  Return the new value of the vector. */
1363
1364 char **
1365 vec_append (char **vec, const char *str)
1366 {
1367   int cnt;                      /* count of vector elements, including
1368                                    the one we're about to append */
1369   if (vec != NULL)
1370     {
1371       for (cnt = 0; vec[cnt]; cnt++)
1372         ;
1373       ++cnt;
1374     }
1375   else
1376     cnt = 1;
1377   /* Reallocate the array to fit the new element and the NULL. */
1378   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1379   /* Append a copy of STR to the vector. */
1380   vec[cnt - 1] = xstrdup (str);
1381   vec[cnt] = NULL;
1382   return vec;
1383 }
1384 \f
1385 /* Sometimes it's useful to create "sets" of strings, i.e. special
1386    hash tables where you want to store strings as keys and merely
1387    query for their existence.  Here is a set of utility routines that
1388    makes that transparent.  */
1389
1390 void
1391 string_set_add (struct hash_table *ht, const char *s)
1392 {
1393   /* First check whether the set element already exists.  If it does,
1394      do nothing so that we don't have to free() the old element and
1395      then strdup() a new one.  */
1396   if (hash_table_contains (ht, s))
1397     return;
1398
1399   /* We use "1" as value.  It provides us a useful and clear arbitrary
1400      value, and it consumes no memory -- the pointers to the same
1401      string "1" will be shared by all the key-value pairs in all `set'
1402      hash tables.  */
1403   hash_table_put (ht, xstrdup (s), "1");
1404 }
1405
1406 /* Synonym for hash_table_contains... */
1407
1408 int
1409 string_set_contains (struct hash_table *ht, const char *s)
1410 {
1411   return hash_table_contains (ht, s);
1412 }
1413
1414 /* Convert the specified string set to array.  ARRAY should be large
1415    enough to hold hash_table_count(ht) char pointers.  */
1416
1417 void string_set_to_array (struct hash_table *ht, char **array)
1418 {
1419   hash_table_iterator iter;
1420   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1421     *array++ = iter.key;
1422 }
1423
1424 /* Free the string set.  This frees both the storage allocated for
1425    keys and the actual hash table.  (hash_table_destroy would only
1426    destroy the hash table.)  */
1427
1428 void
1429 string_set_free (struct hash_table *ht)
1430 {
1431   hash_table_iterator iter;
1432   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1433     xfree (iter.key);
1434   hash_table_destroy (ht);
1435 }
1436
1437 /* Utility function: simply call xfree() on all keys and values of HT.  */
1438
1439 void
1440 free_keys_and_values (struct hash_table *ht)
1441 {
1442   hash_table_iterator iter;
1443   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1444     {
1445       xfree (iter.key);
1446       xfree (iter.value);
1447     }
1448 }
1449 \f
1450 /* Get digit grouping data for thousand separors by calling
1451    localeconv().  The data includes separator string and grouping info
1452    and is cached after the first call to the function.
1453
1454    In locales that don't set a thousand separator (such as the "C"
1455    locale), this forces it to be ",".  We are now only showing
1456    thousand separators in one place, so this shouldn't be a problem in
1457    practice.  */
1458
1459 static void
1460 get_grouping_data (const char **sep, const char **grouping)
1461 {
1462   static const char *cached_sep;
1463   static const char *cached_grouping;
1464   static bool initialized;
1465   if (!initialized)
1466     {
1467       /* Get the grouping info from the locale. */
1468       struct lconv *lconv = localeconv ();
1469       cached_sep = lconv->thousands_sep;
1470       cached_grouping = lconv->grouping;
1471 #if ! USE_NLS_PROGRESS_BAR
1472       /* We can't count column widths, so ensure that the separator
1473        * is single-byte only (let check below determine what byte). */
1474       if (strlen(cached_sep) > 1)
1475         cached_sep = "";
1476 #endif
1477       if (!*cached_sep)
1478         {
1479           /* Many locales (such as "C" or "hr_HR") don't specify
1480              grouping, which we still want to use it for legibility.
1481              In those locales set the sep char to ',', unless that
1482              character is used for decimal point, in which case set it
1483              to ".".  */
1484           if (*lconv->decimal_point != ',')
1485             cached_sep = ",";
1486           else
1487             cached_sep = ".";
1488           cached_grouping = "\x03";
1489         }
1490       initialized = true;
1491     }
1492   *sep = cached_sep;
1493   *grouping = cached_grouping;
1494 }
1495
1496 /* Return a printed representation of N with thousand separators.
1497    This should respect locale settings, with the exception of the "C"
1498    locale which mandates no separator, but we use one anyway.
1499
1500    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1501    the separators because it's too non-portable, and it's hard to test
1502    for this feature at configure time.  Besides, it wouldn't display
1503    separators in the "C" locale, still used by many Unix users.  */
1504
1505 const char *
1506 with_thousand_seps (wgint n)
1507 {
1508   static char outbuf[48];
1509   char *p = outbuf + sizeof outbuf;
1510
1511   /* Info received from locale */
1512   const char *grouping, *sep;
1513   int seplen;
1514
1515   /* State information */
1516   int i = 0, groupsize;
1517   const char *atgroup;
1518
1519   bool negative = n < 0;
1520
1521   /* Initialize grouping data. */
1522   get_grouping_data (&sep, &grouping);
1523   seplen = strlen (sep);
1524   atgroup = grouping;
1525   groupsize = *atgroup++;
1526
1527   /* This would overflow on WGINT_MIN, but printing negative numbers
1528      is not an important goal of this fuinction.  */
1529   if (negative)
1530     n = -n;
1531
1532   /* Write the number into the buffer, backwards, inserting the
1533      separators as necessary.  */
1534   *--p = '\0';
1535   while (1)
1536     {
1537       *--p = n % 10 + '0';
1538       n /= 10;
1539       if (n == 0)
1540         break;
1541       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1542       if (++i == groupsize)
1543         {
1544           if (seplen == 1)
1545             *--p = *sep;
1546           else
1547             memcpy (p -= seplen, sep, seplen);
1548           i = 0;
1549           if (*atgroup)
1550             groupsize = *atgroup++;
1551         }
1552     }
1553   if (negative)
1554     *--p = '-';
1555
1556   return p;
1557 }
1558
1559 /* N, a byte quantity, is converted to a human-readable abberviated
1560    form a la sizes printed by `ls -lh'.  The result is written to a
1561    static buffer, a pointer to which is returned.
1562
1563    Unlike `with_thousand_seps', this approximates to the nearest unit.
1564    Quoting GNU libit: "Most people visually process strings of 3-4
1565    digits effectively, but longer strings of digits are more prone to
1566    misinterpretation.  Hence, converting to an abbreviated form
1567    usually improves readability."
1568
1569    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1570    original computer-related meaning of "powers of 1024".  We don't
1571    use the "*bibyte" names invented in 1998, and seldom used in
1572    practice.  Wikipedia's entry on "binary prefix" discusses this in
1573    some detail.  */
1574
1575 char *
1576 human_readable (HR_NUMTYPE n)
1577 {
1578   /* These suffixes are compatible with those of GNU `ls -lh'. */
1579   static char powers[] =
1580     {
1581       'K',                      /* kilobyte, 2^10 bytes */
1582       'M',                      /* megabyte, 2^20 bytes */
1583       'G',                      /* gigabyte, 2^30 bytes */
1584       'T',                      /* terabyte, 2^40 bytes */
1585       'P',                      /* petabyte, 2^50 bytes */
1586       'E',                      /* exabyte,  2^60 bytes */
1587     };
1588   static char buf[8];
1589   size_t i;
1590
1591   /* If the quantity is smaller than 1K, just print it. */
1592   if (n < 1024)
1593     {
1594       snprintf (buf, sizeof (buf), "%d", (int) n);
1595       return buf;
1596     }
1597
1598   /* Loop over powers, dividing N with 1024 in each iteration.  This
1599      works unchanged for all sizes of wgint, while still avoiding
1600      non-portable `long double' arithmetic.  */
1601   for (i = 0; i < countof (powers); i++)
1602     {
1603       /* At each iteration N is greater than the *subsequent* power.
1604          That way N/1024.0 produces a decimal number in the units of
1605          *this* power.  */
1606       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1607         {
1608           double val = n / 1024.0;
1609           /* Print values smaller than 10 with one decimal digits, and
1610              others without any decimals.  */
1611           snprintf (buf, sizeof (buf), "%.*f%c",
1612                     val < 10 ? 1 : 0, val, powers[i]);
1613           return buf;
1614         }
1615       n /= 1024;
1616     }
1617   return NULL;                  /* unreached */
1618 }
1619
1620 /* Count the digits in the provided number.  Used to allocate space
1621    when printing numbers.  */
1622
1623 int
1624 numdigit (wgint number)
1625 {
1626   int cnt = 1;
1627   if (number < 0)
1628     ++cnt;                      /* accomodate '-' */
1629   while ((number /= 10) != 0)
1630     ++cnt;
1631   return cnt;
1632 }
1633
1634 #define PR(mask) *p++ = n / (mask) + '0'
1635
1636 /* DIGITS_<D> is used to print a D-digit number and should be called
1637    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1638    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1639    Recursively this continues until DIGITS_1 is invoked.  */
1640
1641 #define DIGITS_1(mask) PR (mask)
1642 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1643 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1644 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1645 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1646 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1647 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1648 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1649 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1650 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1651
1652 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1653
1654 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1655 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1656 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1657 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1658 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1659 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1660 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1661 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1662 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1663
1664 /* Shorthand for casting to wgint. */
1665 #define W wgint
1666
1667 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1668    `sprintf(buffer, "%lld", (long long) number)', only typically much
1669    faster and portable to machines without long long.
1670
1671    The speedup may make a difference in programs that frequently
1672    convert numbers to strings.  Some implementations of sprintf,
1673    particularly the one in some versions of GNU libc, have been known
1674    to be quite slow when converting integers to strings.
1675
1676    Return the pointer to the location where the terminating zero was
1677    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1678    function is done.)
1679
1680    BUFFER should be large enough to accept as many bytes as you expect
1681    the number to take up.  On machines with 64-bit wgints the maximum
1682    needed size is 24 bytes.  That includes the digits needed for the
1683    largest 64-bit number, the `-' sign in case it's negative, and the
1684    terminating '\0'.  */
1685
1686 char *
1687 number_to_string (char *buffer, wgint number)
1688 {
1689   char *p = buffer;
1690   wgint n = number;
1691
1692   int last_digit_char = 0;
1693
1694 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1695   /* We are running in a very strange environment.  Leave the correct
1696      printing to sprintf.  */
1697   p += sprintf (buf, "%j", (intmax_t) (n));
1698 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1699
1700   if (n < 0)
1701     {
1702       if (n < -WGINT_MAX)
1703         {
1704           /* n = -n would overflow because -n would evaluate to a
1705              wgint value larger than WGINT_MAX.  Need to make n
1706              smaller and handle the last digit separately.  */
1707           int last_digit = n % 10;
1708           /* The sign of n%10 is implementation-defined. */
1709           if (last_digit < 0)
1710             last_digit_char = '0' - last_digit;
1711           else
1712             last_digit_char = '0' + last_digit;
1713           /* After n is made smaller, -n will not overflow. */
1714           n /= 10;
1715         }
1716
1717       *p++ = '-';
1718       n = -n;
1719     }
1720
1721   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1722      way printing any N is fully open-coded without a loop or jump.
1723      (Also see description of DIGITS_*.)  */
1724
1725   if      (n < 10)                       DIGITS_1 (1);
1726   else if (n < 100)                      DIGITS_2 (10);
1727   else if (n < 1000)                     DIGITS_3 (100);
1728   else if (n < 10000)                    DIGITS_4 (1000);
1729   else if (n < 100000)                   DIGITS_5 (10000);
1730   else if (n < 1000000)                  DIGITS_6 (100000);
1731   else if (n < 10000000)                 DIGITS_7 (1000000);
1732   else if (n < 100000000)                DIGITS_8 (10000000);
1733   else if (n < 1000000000)               DIGITS_9 (100000000);
1734 #if SIZEOF_WGINT == 4
1735   /* wgint is 32 bits wide: no number has more than 10 digits. */
1736   else                                   DIGITS_10 (1000000000);
1737 #else
1738   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1739      Constants are constructed by compile-time multiplication to avoid
1740      dealing with different notations for 64-bit constants
1741      (nL/nLL/nI64, depending on the compiler and architecture).  */
1742   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1743   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1744   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1745   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1746   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1747   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1748   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1749   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1750   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1751   else                                   DIGITS_19 (1000000000*(W)1000000000);
1752 #endif
1753
1754   if (last_digit_char)
1755     *p++ = last_digit_char;
1756
1757   *p = '\0';
1758 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1759
1760   return p;
1761 }
1762
1763 #undef PR
1764 #undef W
1765 #undef SPRINTF_WGINT
1766 #undef DIGITS_1
1767 #undef DIGITS_2
1768 #undef DIGITS_3
1769 #undef DIGITS_4
1770 #undef DIGITS_5
1771 #undef DIGITS_6
1772 #undef DIGITS_7
1773 #undef DIGITS_8
1774 #undef DIGITS_9
1775 #undef DIGITS_10
1776 #undef DIGITS_11
1777 #undef DIGITS_12
1778 #undef DIGITS_13
1779 #undef DIGITS_14
1780 #undef DIGITS_15
1781 #undef DIGITS_16
1782 #undef DIGITS_17
1783 #undef DIGITS_18
1784 #undef DIGITS_19
1785
1786 #define RING_SIZE 3
1787
1788 /* Print NUMBER to a statically allocated string and return a pointer
1789    to the printed representation.
1790
1791    This function is intended to be used in conjunction with printf.
1792    It is hard to portably print wgint values:
1793     a) you cannot use printf("%ld", number) because wgint can be long
1794        long on 32-bit machines with LFS.
1795     b) you cannot use printf("%lld", number) because NUMBER could be
1796        long on 32-bit machines without LFS, or on 64-bit machines,
1797        which do not require LFS.  Also, Windows doesn't support %lld.
1798     c) you cannot use printf("%j", (int_max_t) number) because not all
1799        versions of printf support "%j", the most notable being the one
1800        on Windows.
1801     d) you cannot #define WGINT_FMT to the appropriate format and use
1802        printf(WGINT_FMT, number) because that would break translations
1803        for user-visible messages, such as printf("Downloaded: %d
1804        bytes\n", number).
1805
1806    What you should use instead is printf("%s", number_to_static_string
1807    (number)).
1808
1809    CAVEAT: since the function returns pointers to static data, you
1810    must be careful to copy its result before calling it again.
1811    However, to make it more useful with printf, the function maintains
1812    an internal ring of static buffers to return.  That way things like
1813    printf("%s %s", number_to_static_string (num1),
1814    number_to_static_string (num2)) work as expected.  Three buffers
1815    are currently used, which means that "%s %s %s" will work, but "%s
1816    %s %s %s" won't.  If you need to print more than three wgints,
1817    bump the RING_SIZE (or rethink your message.)  */
1818
1819 char *
1820 number_to_static_string (wgint number)
1821 {
1822   static char ring[RING_SIZE][24];
1823   static int ringpos;
1824   char *buf = ring[ringpos];
1825   number_to_string (buf, number);
1826   ringpos = (ringpos + 1) % RING_SIZE;
1827   return buf;
1828 }
1829 \f
1830 /* Determine the width of the terminal we're running on.  If that's
1831    not possible, return 0.  */
1832
1833 int
1834 determine_screen_width (void)
1835 {
1836   /* If there's a way to get the terminal size using POSIX
1837      tcgetattr(), somebody please tell me.  */
1838 #ifdef TIOCGWINSZ
1839   int fd;
1840   struct winsize wsz;
1841
1842   if (opt.lfilename != NULL)
1843     return 0;
1844
1845   fd = fileno (stderr);
1846   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1847     return 0;                   /* most likely ENOTTY */
1848
1849   return wsz.ws_col;
1850 #elif defined(WINDOWS)
1851   CONSOLE_SCREEN_BUFFER_INFO csbi;
1852   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1853     return 0;
1854   return csbi.dwSize.X;
1855 #else  /* neither TIOCGWINSZ nor WINDOWS */
1856   return 0;
1857 #endif /* neither TIOCGWINSZ nor WINDOWS */
1858 }
1859 \f
1860 /* Whether the rnd system (either rand or [dl]rand48) has been
1861    seeded.  */
1862 static int rnd_seeded;
1863
1864 /* Return a random number between 0 and MAX-1, inclusive.
1865
1866    If the system does not support lrand48 and MAX is greater than the
1867    value of RAND_MAX+1 on the system, the returned value will be in
1868    the range [0, RAND_MAX].  This may be fixed in a future release.
1869    The random number generator is seeded automatically the first time
1870    it is called.
1871
1872    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1873    for cryptography.  It is only meant to be used in situations where
1874    quality of the random numbers returned doesn't really matter.  */
1875
1876 int
1877 random_number (int max)
1878 {
1879 #ifdef HAVE_DRAND48
1880   if (!rnd_seeded)
1881     {
1882       srand48 ((long) time (NULL) ^ (long) getpid ());
1883       rnd_seeded = 1;
1884     }
1885   return lrand48 () % max;
1886 #else  /* not HAVE_DRAND48 */
1887
1888   double bounded;
1889   int rnd;
1890   if (!rnd_seeded)
1891     {
1892       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1893       rnd_seeded = 1;
1894     }
1895   rnd = rand ();
1896
1897   /* Like rand() % max, but uses the high-order bits for better
1898      randomness on architectures where rand() is implemented using a
1899      simple congruential generator.  */
1900
1901   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1902   return (int) bounded;
1903
1904 #endif /* not HAVE_DRAND48 */
1905 }
1906
1907 /* Return a random uniformly distributed floating point number in the
1908    [0, 1) range.  Uses drand48 where available, and a really lame
1909    kludge elsewhere.  */
1910
1911 double
1912 random_float (void)
1913 {
1914 #ifdef HAVE_DRAND48
1915   if (!rnd_seeded)
1916     {
1917       srand48 ((long) time (NULL) ^ (long) getpid ());
1918       rnd_seeded = 1;
1919     }
1920   return drand48 ();
1921 #else  /* not HAVE_DRAND48 */
1922   return (  random_number (10000) / 10000.0
1923           + random_number (10000) / (10000.0 * 10000.0)
1924           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1925           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1926 #endif /* not HAVE_DRAND48 */
1927 }
1928 \f
1929 /* Implementation of run_with_timeout, a generic timeout-forcing
1930    routine for systems with Unix-like signal handling.  */
1931
1932 #ifdef USE_SIGNAL_TIMEOUT
1933 # ifdef HAVE_SIGSETJMP
1934 #  define SETJMP(env) sigsetjmp (env, 1)
1935
1936 static sigjmp_buf run_with_timeout_env;
1937
1938 static void
1939 abort_run_with_timeout (int sig)
1940 {
1941   assert (sig == SIGALRM);
1942   siglongjmp (run_with_timeout_env, -1);
1943 }
1944 # else /* not HAVE_SIGSETJMP */
1945 #  define SETJMP(env) setjmp (env)
1946
1947 static jmp_buf run_with_timeout_env;
1948
1949 static void
1950 abort_run_with_timeout (int sig)
1951 {
1952   assert (sig == SIGALRM);
1953   /* We don't have siglongjmp to preserve the set of blocked signals;
1954      if we longjumped out of the handler at this point, SIGALRM would
1955      remain blocked.  We must unblock it manually. */
1956   sigset_t set;
1957   sigemptyset (&set);
1958   sigaddset (&set, SIGALRM);
1959   sigprocmask (SIG_BLOCK, &set, NULL);
1960
1961   /* Now it's safe to longjump. */
1962   longjmp (run_with_timeout_env, -1);
1963 }
1964 # endif /* not HAVE_SIGSETJMP */
1965
1966 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1967    setitimer where available, alarm otherwise.
1968
1969    TIMEOUT should be non-zero.  If the timeout value is so small that
1970    it would be rounded to zero, it is rounded to the least legal value
1971    instead (1us for setitimer, 1s for alarm).  That ensures that
1972    SIGALRM will be delivered in all cases.  */
1973
1974 static void
1975 alarm_set (double timeout)
1976 {
1977 #ifdef ITIMER_REAL
1978   /* Use the modern itimer interface. */
1979   struct itimerval itv;
1980   xzero (itv);
1981   itv.it_value.tv_sec = (long) timeout;
1982   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1983   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1984     /* Ensure that we wait for at least the minimum interval.
1985        Specifying zero would mean "wait forever".  */
1986     itv.it_value.tv_usec = 1;
1987   setitimer (ITIMER_REAL, &itv, NULL);
1988 #else  /* not ITIMER_REAL */
1989   /* Use the old alarm() interface. */
1990   int secs = (int) timeout;
1991   if (secs == 0)
1992     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1993        because alarm(0) means "never deliver the alarm", i.e. "wait
1994        forever", which is not what someone who specifies a 0.5s
1995        timeout would expect.  */
1996     secs = 1;
1997   alarm (secs);
1998 #endif /* not ITIMER_REAL */
1999 }
2000
2001 /* Cancel the alarm set with alarm_set. */
2002
2003 static void
2004 alarm_cancel (void)
2005 {
2006 #ifdef ITIMER_REAL
2007   struct itimerval disable;
2008   xzero (disable);
2009   setitimer (ITIMER_REAL, &disable, NULL);
2010 #else  /* not ITIMER_REAL */
2011   alarm (0);
2012 #endif /* not ITIMER_REAL */
2013 }
2014
2015 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2016    seconds.  Returns true if the function was interrupted with a
2017    timeout, false otherwise.
2018
2019    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2020    using setitimer() or alarm().  The timeout is enforced by
2021    longjumping out of the SIGALRM handler.  This has several
2022    advantages compared to the traditional approach of relying on
2023    signals causing system calls to exit with EINTR:
2024
2025      * The callback function is *forcibly* interrupted after the
2026        timeout expires, (almost) regardless of what it was doing and
2027        whether it was in a syscall.  For example, a calculation that
2028        takes a long time is interrupted as reliably as an IO
2029        operation.
2030
2031      * It works with both SYSV and BSD signals because it doesn't
2032        depend on the default setting of SA_RESTART.
2033
2034      * It doesn't require special handler setup beyond a simple call
2035        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2036        optional.)
2037
2038    The only downside is that, if FUN allocates internal resources that
2039    are normally freed prior to exit from the functions, they will be
2040    lost in case of timeout.  */
2041
2042 bool
2043 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2044 {
2045   int saved_errno;
2046
2047   if (timeout == 0)
2048     {
2049       fun (arg);
2050       return false;
2051     }
2052
2053   signal (SIGALRM, abort_run_with_timeout);
2054   if (SETJMP (run_with_timeout_env) != 0)
2055     {
2056       /* Longjumped out of FUN with a timeout. */
2057       signal (SIGALRM, SIG_DFL);
2058       return true;
2059     }
2060   alarm_set (timeout);
2061   fun (arg);
2062
2063   /* Preserve errno in case alarm() or signal() modifies it. */
2064   saved_errno = errno;
2065   alarm_cancel ();
2066   signal (SIGALRM, SIG_DFL);
2067   errno = saved_errno;
2068
2069   return false;
2070 }
2071
2072 #else  /* not USE_SIGNAL_TIMEOUT */
2073
2074 #ifndef WINDOWS
2075 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2076    define it under Windows, because Windows has its own version of
2077    run_with_timeout that uses threads.  */
2078
2079 bool
2080 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2081 {
2082   fun (arg);
2083   return false;
2084 }
2085 #endif /* not WINDOWS */
2086 #endif /* not USE_SIGNAL_TIMEOUT */
2087 \f
2088 #ifndef WINDOWS
2089
2090 /* Sleep the specified amount of seconds.  On machines without
2091    nanosleep(), this may sleep shorter if interrupted by signals.  */
2092
2093 void
2094 xsleep (double seconds)
2095 {
2096 #ifdef HAVE_NANOSLEEP
2097   /* nanosleep is the preferred interface because it offers high
2098      accuracy and, more importantly, because it allows us to reliably
2099      restart receiving a signal such as SIGWINCH.  (There was an
2100      actual Debian bug report about --limit-rate malfunctioning while
2101      the terminal was being resized.)  */
2102   struct timespec sleep, remaining;
2103   sleep.tv_sec = (long) seconds;
2104   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2105   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2106     /* If nanosleep has been interrupted by a signal, adjust the
2107        sleeping period and return to sleep.  */
2108     sleep = remaining;
2109 #elif defined(HAVE_USLEEP)
2110   /* If usleep is available, use it in preference to select.  */
2111   if (seconds >= 1)
2112     {
2113       /* On some systems, usleep cannot handle values larger than
2114          1,000,000.  If the period is larger than that, use sleep
2115          first, then add usleep for subsecond accuracy.  */
2116       sleep (seconds);
2117       seconds -= (long) seconds;
2118     }
2119   usleep (seconds * 1000000);
2120 #else /* fall back select */
2121   /* Note that, although Windows supports select, it can't be used to
2122      implement sleeping because Winsock's select doesn't implement
2123      timeout when it is passed NULL pointers for all fd sets.  (But it
2124      does under Cygwin, which implements Unix-compatible select.)  */
2125   struct timeval sleep;
2126   sleep.tv_sec = (long) seconds;
2127   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2128   select (0, NULL, NULL, NULL, &sleep);
2129   /* If select returns -1 and errno is EINTR, it means we were
2130      interrupted by a signal.  But without knowing how long we've
2131      actually slept, we can't return to sleep.  Using gettimeofday to
2132      track sleeps is slow and unreliable due to clock skew.  */
2133 #endif
2134 }
2135
2136 #endif /* not WINDOWS */
2137
2138 /* Encode the octets in DATA of length LENGTH to base64 format,
2139    storing the result to DEST.  The output will be zero-terminated,
2140    and must point to a writable buffer of at least
2141    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2142    the resulting base64 data, not counting the terminating zero.
2143
2144    This implementation does not emit newlines after 76 characters of
2145    base64 data.  */
2146
2147 int
2148 base64_encode (const void *data, int length, char *dest)
2149 {
2150   /* Conversion table.  */
2151   static const char tbl[64] = {
2152     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2153     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2154     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2155     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2156   };
2157   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2158      don't work for data with MSB set. */
2159   const unsigned char *s = data;
2160   /* Theoretical ANSI violation when length < 3. */
2161   const unsigned char *end = (const unsigned char *) data + length - 2;
2162   char *p = dest;
2163
2164   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2165   for (; s < end; s += 3)
2166     {
2167       *p++ = tbl[s[0] >> 2];
2168       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2169       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2170       *p++ = tbl[s[2] & 0x3f];
2171     }
2172
2173   /* Pad the result if necessary...  */
2174   switch (length % 3)
2175     {
2176     case 1:
2177       *p++ = tbl[s[0] >> 2];
2178       *p++ = tbl[(s[0] & 3) << 4];
2179       *p++ = '=';
2180       *p++ = '=';
2181       break;
2182     case 2:
2183       *p++ = tbl[s[0] >> 2];
2184       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2185       *p++ = tbl[((s[1] & 0xf) << 2)];
2186       *p++ = '=';
2187       break;
2188     }
2189   /* ...and zero-terminate it.  */
2190   *p = '\0';
2191
2192   return p - dest;
2193 }
2194
2195 /* Store in C the next non-whitespace character from the string, or \0
2196    when end of string is reached.  */
2197 #define NEXT_CHAR(c, p) do {                    \
2198   c = (unsigned char) *p++;                     \
2199 } while (c_isspace (c))
2200
2201 #define IS_ASCII(c) (((c) & 0x80) == 0)
2202
2203 /* Decode data from BASE64 (a null-terminated string) into memory
2204    pointed to by DEST.  DEST is assumed to be large enough to
2205    accomodate the decoded data, which is guaranteed to be no more than
2206    3/4*strlen(base64).
2207
2208    Since DEST is assumed to contain binary data, it is not
2209    NUL-terminated.  The function returns the length of the data
2210    written to TO.  -1 is returned in case of error caused by malformed
2211    base64 input.
2212
2213    This function originates from Free Recode.  */
2214
2215 int
2216 base64_decode (const char *base64, void *dest)
2217 {
2218   /* Table of base64 values for first 128 characters.  Note that this
2219      assumes ASCII (but so does Wget in other places).  */
2220   static const signed char base64_char_to_value[128] =
2221     {
2222       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2223       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2224       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2225       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2226       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2227       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2228       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2229       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2230       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2231       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2232       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2233       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2234       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2235     };
2236 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2237 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2238
2239   const char *p = base64;
2240   char *q = dest;
2241
2242   while (1)
2243     {
2244       unsigned char c;
2245       unsigned long value;
2246
2247       /* Process first byte of a quadruplet.  */
2248       NEXT_CHAR (c, p);
2249       if (!c)
2250         break;
2251       if (c == '=' || !IS_BASE64 (c))
2252         return -1;              /* illegal char while decoding base64 */
2253       value = BASE64_CHAR_TO_VALUE (c) << 18;
2254
2255       /* Process second byte of a quadruplet.  */
2256       NEXT_CHAR (c, p);
2257       if (!c)
2258         return -1;              /* premature EOF while decoding base64 */
2259       if (c == '=' || !IS_BASE64 (c))
2260         return -1;              /* illegal char while decoding base64 */
2261       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2262       *q++ = value >> 16;
2263
2264       /* Process third byte of a quadruplet.  */
2265       NEXT_CHAR (c, p);
2266       if (!c)
2267         return -1;              /* premature EOF while decoding base64 */
2268       if (!IS_BASE64 (c))
2269         return -1;              /* illegal char while decoding base64 */
2270
2271       if (c == '=')
2272         {
2273           NEXT_CHAR (c, p);
2274           if (!c)
2275             return -1;          /* premature EOF while decoding base64 */
2276           if (c != '=')
2277             return -1;          /* padding `=' expected but not found */
2278           continue;
2279         }
2280
2281       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2282       *q++ = 0xff & value >> 8;
2283
2284       /* Process fourth byte of a quadruplet.  */
2285       NEXT_CHAR (c, p);
2286       if (!c)
2287         return -1;              /* premature EOF while decoding base64 */
2288       if (c == '=')
2289         continue;
2290       if (!IS_BASE64 (c))
2291         return -1;              /* illegal char while decoding base64 */
2292
2293       value |= BASE64_CHAR_TO_VALUE (c);
2294       *q++ = 0xff & value;
2295     }
2296 #undef IS_BASE64
2297 #undef BASE64_CHAR_TO_VALUE
2298
2299   return q - (char *) dest;
2300 }
2301
2302 #undef IS_ASCII
2303 #undef NEXT_CHAR
2304 \f
2305 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2306    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2307
2308 static void
2309 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2310                     int (*cmpfun) (const void *, const void *))
2311 {
2312 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2313   if (from < to)
2314     {
2315       size_t i, j, k;
2316       size_t mid = (to + from) / 2;
2317       mergesort_internal (base, temp, size, from, mid, cmpfun);
2318       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2319       i = from;
2320       j = mid + 1;
2321       for (k = from; (i <= mid) && (j <= to); k++)
2322         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2323           memcpy (ELT (temp, k), ELT (base, i++), size);
2324         else
2325           memcpy (ELT (temp, k), ELT (base, j++), size);
2326       while (i <= mid)
2327         memcpy (ELT (temp, k++), ELT (base, i++), size);
2328       while (j <= to)
2329         memcpy (ELT (temp, k++), ELT (base, j++), size);
2330       for (k = from; k <= to; k++)
2331         memcpy (ELT (base, k), ELT (temp, k), size);
2332     }
2333 #undef ELT
2334 }
2335
2336 /* Stable sort with interface exactly like standard library's qsort.
2337    Uses mergesort internally, allocating temporary storage with
2338    alloca.  */
2339
2340 void
2341 stable_sort (void *base, size_t nmemb, size_t size,
2342              int (*cmpfun) (const void *, const void *))
2343 {
2344   if (size > 1)
2345     {
2346       void *temp = alloca (nmemb * size * sizeof (void *));
2347       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2348     }
2349 }
2350 \f
2351 /* Print a decimal number.  If it is equal to or larger than ten, the
2352    number is rounded.  Otherwise it is printed with one significant
2353    digit without trailing zeros and with no more than three fractional
2354    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2355    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2356
2357    This is useful for displaying durations because it provides
2358    order-of-magnitude information without unnecessary clutter --
2359    long-running downloads are shown without the fractional part, and
2360    short ones still retain one significant digit.  */
2361
2362 const char *
2363 print_decimal (double number)
2364 {
2365   static char buf[32];
2366   double n = number >= 0 ? number : -number;
2367
2368   if (n >= 9.95)
2369     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2370        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2371     snprintf (buf, sizeof buf, "%.0f", number);
2372   else if (n >= 0.95)
2373     snprintf (buf, sizeof buf, "%.1f", number);
2374   else if (n >= 0.001)
2375     snprintf (buf, sizeof buf, "%.1g", number);
2376   else if (n >= 0.0005)
2377     /* round [0.0005, 0.001) to 0.001 */
2378     snprintf (buf, sizeof buf, "%.3f", number);
2379   else
2380     /* print numbers close to 0 as 0, not 0.000 */
2381     strcpy (buf, "0");
2382
2383   return buf;
2384 }
2385
2386 #ifdef TESTING
2387
2388 const char *
2389 test_subdir_p()
2390 {
2391   int i;
2392   struct {
2393     char *d1;
2394     char *d2;
2395     bool result;
2396   } test_array[] = {
2397     { "/somedir", "/somedir", true },
2398     { "/somedir", "/somedir/d2", true },
2399     { "/somedir/d1", "/somedir", false },
2400   };
2401
2402   for (i = 0; i < countof(test_array); ++i)
2403     {
2404       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2405
2406       mu_assert ("test_subdir_p: wrong result",
2407                  res == test_array[i].result);
2408     }
2409
2410   return NULL;
2411 }
2412
2413 const char *
2414 test_dir_matches_p()
2415 {
2416   int i;
2417   struct {
2418     char *dirlist[3];
2419     char *dir;
2420     bool result;
2421   } test_array[] = {
2422     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2423     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2424     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2425     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2426     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2427     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2428     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2429     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2430     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2431     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2432     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2433     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2434     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2435     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2436     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2437   };
2438
2439   for (i = 0; i < countof(test_array); ++i)
2440     {
2441       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2442
2443       mu_assert ("test_dir_matches_p: wrong result",
2444                  res == test_array[i].result);
2445     }
2446
2447   return NULL;
2448 }
2449
2450 #endif /* TESTING */
2451