sjero.net Git - wget/blob - src/utils.c

   1 /* Various utility functions.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   3    2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
   4    Inc.
   5
   6 This file is part of GNU Wget.
   7
   8 GNU Wget is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Wget is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 Additional permission under GNU GPL version 3 section 7
  22
  23 If you modify this program, or any covered work, by linking or
  24 combining it with the OpenSSL project's OpenSSL library (or a
  25 modified version of that library), containing parts covered by the
  26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  27 grants you additional permission to convey the resulting work.
  28 Corresponding Source for a non-source form of such a combination
  29 shall include the source code for the parts of OpenSSL used as well
  30 as that of the covered work.  */
  31
  32 #include "wget.h"
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <unistd.h>
  39 #ifdef HAVE_MMAP
  40 # include <sys/mman.h>
  41 #endif
  42 #ifdef HAVE_PROCESS_H
  43 # include <process.h>  /* getpid() */
  44 #endif
  45 #include <errno.h>
  46 #include <fcntl.h>
  47 #include <assert.h>
  48 #include <stdarg.h>
  49 #include <locale.h>
  50
  51 #if HAVE_UTIME
  52 # include <sys/types.h>
  53 # ifdef HAVE_UTIME_H
  54 #  include <utime.h>
  55 # endif
  56
  57 # ifdef HAVE_SYS_UTIME_H
  58 #  include <sys/utime.h>
  59 # endif
  60 #endif
  61
  62 #include <sys/time.h>
  63
  64 #include <sys/stat.h>
  65
  66 /* For TIOCGWINSZ and friends: */
  67 #include <sys/ioctl.h>
  68 #ifdef HAVE_TERMIOS_H
  69 # include <termios.h>
  70 #endif
  71
  72 /* Needed for Unix version of run_with_timeout. */
  73 #include <signal.h>
  74 #include <setjmp.h>
  75
  76 #include <regex.h>
  77 #ifdef HAVE_LIBPCRE
  78 # include <pcre.h>
  79 #endif
  80
  81 #ifndef HAVE_SIGSETJMP
  82 /* If sigsetjmp is a macro, configure won't pick it up. */
  83 # ifdef sigsetjmp
  84 #  define HAVE_SIGSETJMP
  85 # endif
  86 #endif
  87
  88 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  89 # define USE_SIGNAL_TIMEOUT
  90 #endif
  91
  92 #include "utils.h"
  93 #include "hash.h"
  94
  95 #ifdef __VMS
  96 #include "vms.h"
  97 #endif /* def __VMS */
  98
  99 #ifdef TESTING
 100 #include "test.h"
 101 #endif
 102
 103 static void
 104 memfatal (const char *context, long attempted_size)
 105 {
 106   /* Make sure we don't try to store part of the log line, and thus
 107      call malloc.  */
 108   log_set_save_context (false);
 109
 110   /* We have different log outputs in different situations:
 111      1) output without bytes information
 112      2) output with bytes information  */
 113   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
 114     {
 115       logprintf (LOG_ALWAYS,
 116                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
 117                  exec_name, context);
 118     }
 119   else
 120     {
 121       logprintf (LOG_ALWAYS,
 122                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
 123                  exec_name, context, attempted_size);
 124     }
 125
 126   exit (1);
 127 }
 128
 129 /* Character property table for (re-)escaping VMS ODS5 extended file
 130    names.  Note that this table ignores Unicode.
 131
 132    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
 133
 134    ODS5 Invalid characters:
 135       C0 control codes (0x00 to 0x1F inclusive)
 136       Asterisk (*)
 137       Question mark (?)
 138
 139    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
 140       Double quotation marks (")
 141       Backslash (\)
 142       Colon (:)
 143       Left angle bracket (<)
 144       Right angle bracket (>)
 145       Slash (/)
 146       Vertical bar (|)
 147
 148    Characters escaped by "^":
 149       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
 150        @  [  \  ]  ^  `  {  |  }  ~
 151
 152    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
 153    case.  Note that un-escaped < and > can also confuse a directory
 154    spec.
 155
 156    Characters put out as ^xx:
 157       7F (DEL)
 158       80-9F (C1 control characters)
 159       A0 (nonbreaking space)
 160       FF (Latin small letter y diaeresis)
 161
 162    Other cases:
 163       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
 164
 165     Property table values:
 166       Normal escape:    1
 167       Space:            2
 168       Dot:              4
 169       Hex-hex escape:   8
 170       ODS2 normal:     16
 171       ODS2 lower case: 32
 172       Hex digit:       64
 173 */
 174
 175 unsigned char char_prop[ 256] = {
 176
 177 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
 178     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 179
 180 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
 181     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 182
 183 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
 184     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
 185
 186 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
 187    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
 188
 189 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
 190     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
 191
 192 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
 193    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
 194
 195 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
 196     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
 197
 198 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
 199    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
 200
 201     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 202     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
 203     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 204     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 205     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 206     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 207     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
 208     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
 209 };
 210
 211 /* Utility function: like xstrdup(), but also lowercases S.  */
 212
 213 char *
 214 xstrdup_lower (const char *s)
 215 {
 216   char *copy = xstrdup (s);
 217   char *p = copy;
 218   for (; *p; p++)
 219     *p = c_tolower (*p);
 220   return copy;
 221 }
 222
 223 /* Copy the string formed by two pointers (one on the beginning, other
 224    on the char after the last char) to a new, malloc-ed location.
 225    0-terminate it.  */
 226 char *
 227 strdupdelim (const char *beg, const char *end)
 228 {
 229   char *res = xmalloc (end - beg + 1);
 230   memcpy (res, beg, end - beg);
 231   res[end - beg] = '\0';
 232   return res;
 233 }
 234
 235 /* Parse a string containing comma-separated elements, and return a
 236    vector of char pointers with the elements.  Spaces following the
 237    commas are ignored.  */
 238 char **
 239 sepstring (const char *s)
 240 {
 241   char **res;
 242   const char *p;
 243   int i = 0;
 244
 245   if (!s || !*s)
 246     return NULL;
 247   res = NULL;
 248   p = s;
 249   while (*s)
 250     {
 251       if (*s == ',')
 252         {
 253           res = xrealloc (res, (i + 2) * sizeof (char *));
 254           res[i] = strdupdelim (p, s);
 255           res[++i] = NULL;
 256           ++s;
 257           /* Skip the blanks following the ','.  */
 258           while (c_isspace (*s))
 259             ++s;
 260           p = s;
 261         }
 262       else
 263         ++s;
 264     }
 265   res = xrealloc (res, (i + 2) * sizeof (char *));
 266   res[i] = strdupdelim (p, s);
 267   res[i + 1] = NULL;
 268   return res;
 269 }
 270 \f
 271 /* Like sprintf, but prints into a string of sufficient size freshly
 272    allocated with malloc, which is returned.  If unable to print due
 273    to invalid format, returns NULL.  Inability to allocate needed
 274    memory results in abort, as with xmalloc.  This is in spirit
 275    similar to the GNU/BSD extension asprintf, but somewhat easier to
 276    use.
 277
 278    Internally the function either calls vasprintf or loops around
 279    vsnprintf until the correct size is found.  Since Wget also ships a
 280    fallback implementation of vsnprintf, this should be portable.  */
 281
 282 /* Constant is using for limits memory allocation for text buffer.
 283    Applicable in situation when: vasprintf is not available in the system
 284    and vsnprintf return -1 when long line is truncated (in old versions of
 285    glibc and in other system where C99 doesn`t support) */
 286
 287 #define FMT_MAX_LENGTH 1048576
 288
 289 char *
 290 aprintf (const char *fmt, ...)
 291 {
 292 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
 293   /* Use vasprintf. */
 294   int ret;
 295   va_list args;
 296   char *str;
 297   va_start (args, fmt);
 298   ret = vasprintf (&str, fmt, args);
 299   va_end (args);
 300   if (ret < 0 && errno == ENOMEM)
 301     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
 302                                                       with xmalloc/xrealloc */
 303   else if (ret < 0)
 304     return NULL;
 305   return str;
 306 #else  /* not HAVE_VASPRINTF */
 307
 308   /* vasprintf is unavailable.  snprintf into a small buffer and
 309      resize it as necessary. */
 310   int size = 32;
 311   char *str = xmalloc (size);
 312
 313   /* #### This code will infloop and eventually abort in xrealloc if
 314      passed a FMT that causes snprintf to consistently return -1.  */
 315
 316   while (1)
 317     {
 318       int n;
 319       va_list args;
 320
 321       va_start (args, fmt);
 322       n = vsnprintf (str, size, fmt, args);
 323       va_end (args);
 324
 325       /* If the printing worked, return the string. */
 326       if (n > -1 && n < size)
 327         return str;
 328
 329       /* Else try again with a larger buffer. */
 330       if (n > -1)               /* C99 */
 331         size = n + 1;           /* precisely what is needed */
 332       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
 333         {                               /* maybe we have some wrong
 334                                            format string? */
 335           logprintf (LOG_ALWAYS,
 336                      _("%s: aprintf: text buffer is too big (%ld bytes), "
 337                        "aborting.\n"),
 338                      exec_name, size);  /* printout a log message */
 339           abort ();                     /* and abort... */
 340         }
 341       else
 342         {
 343           /* else, we continue to grow our
 344            * buffer: Twice the old size. */
 345           size <<= 1;
 346         }
 347       str = xrealloc (str, size);
 348     }
 349 #endif /* not HAVE_VASPRINTF */
 350 }
 351
 352 /* Concatenate the NULL-terminated list of string arguments into
 353    freshly allocated space.  */
 354
 355 char *
 356 concat_strings (const char *str0, ...)
 357 {
 358   va_list args;
 359   int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
 360   char *ret, *p;
 361
 362   const char *next_str;
 363   int total_length = 0;
 364   size_t argcount;
 365
 366   /* Calculate the length of and allocate the resulting string. */
 367
 368   argcount = 0;
 369   va_start (args, str0);
 370   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 371     {
 372       int len = strlen (next_str);
 373       if (argcount < countof (saved_lengths))
 374         saved_lengths[argcount++] = len;
 375       total_length += len;
 376     }
 377   va_end (args);
 378   p = ret = xmalloc (total_length + 1);
 379
 380   /* Copy the strings into the allocated space. */
 381
 382   argcount = 0;
 383   va_start (args, str0);
 384   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 385     {
 386       int len;
 387       if (argcount < countof (saved_lengths))
 388         len = saved_lengths[argcount++];
 389       else
 390         len = strlen (next_str);
 391       memcpy (p, next_str, len);
 392       p += len;
 393     }
 394   va_end (args);
 395   *p = '\0';
 396
 397   return ret;
 398 }
 399 \f
 400 /* Format the provided time according to the specified format.  The
 401    format is a string with format elements supported by strftime.  */
 402
 403 static char *
 404 fmttime (time_t t, const char *fmt)
 405 {
 406   static char output[32];
 407   struct tm *tm = localtime(&t);
 408   if (!tm)
 409     abort ();
 410   if (!strftime(output, sizeof(output), fmt, tm))
 411     abort ();
 412   return output;
 413 }
 414
 415 /* Return pointer to a static char[] buffer in which zero-terminated
 416    string-representation of TM (in form hh:mm:ss) is printed.
 417
 418    If TM is NULL, the current time will be used.  */
 419
 420 char *
 421 time_str (time_t t)
 422 {
 423   return fmttime(t, "%H:%M:%S");
 424 }
 425
 426 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 427
 428 char *
 429 datetime_str (time_t t)
 430 {
 431   return fmttime(t, "%Y-%m-%d %H:%M:%S");
 432 }
 433 \f
 434 /* The Windows versions of the following two functions are defined in
 435    mswindows.c. On MSDOS this function should never be called. */
 436
 437 #ifdef __VMS
 438
 439 void
 440 fork_to_background (void)
 441 {
 442   return;
 443 }
 444
 445 #else /* def __VMS */
 446
 447 #if !defined(WINDOWS) && !defined(MSDOS)
 448 void
 449 fork_to_background (void)
 450 {
 451   pid_t pid;
 452   /* Whether we arrange our own version of opt.lfilename here.  */
 453   bool logfile_changed = false;
 454
 455   if (!opt.lfilename && (!opt.quiet || opt.server_response))
 456     {
 457       /* We must create the file immediately to avoid either a race
 458          condition (which arises from using unique_name and failing to
 459          use fopen_excl) or lying to the user about the log file name
 460          (which arises from using unique_name, printing the name, and
 461          using fopen_excl later on.)  */
 462       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
 463       if (new_log_fp)
 464         {
 465           logfile_changed = true;
 466           fclose (new_log_fp);
 467         }
 468     }
 469   pid = fork ();
 470   if (pid < 0)
 471     {
 472       /* parent, error */
 473       perror ("fork");
 474       exit (1);
 475     }
 476   else if (pid != 0)
 477     {
 478       /* parent, no error */
 479       printf (_("Continuing in background, pid %d.\n"), (int) pid);
 480       if (logfile_changed)
 481         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
 482       exit (0);                 /* #### should we use _exit()? */
 483     }
 484
 485   /* child: give up the privileges and keep running. */
 486   setsid ();
 487   if (freopen ("/dev/null", "r", stdin) == NULL)
 488     DEBUGP (("Failed to redirect stdin to /dev/null.\n"));
 489   if (freopen ("/dev/null", "w", stdout) == NULL)
 490     DEBUGP (("Failed to redirect stdout to /dev/null.\n"));
 491   if (freopen ("/dev/null", "w", stderr) == NULL)
 492     DEBUGP (("Failed to redirect stderr to /dev/null.\n"));
 493 }
 494 #endif /* !WINDOWS && !MSDOS */
 495
 496 #endif /* def __VMS [else] */
 497
 498 \f
 499 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
 500    specified with TM.  The atime ("access time") is set to the current
 501    time.  */
 502
 503 void
 504 touch (const char *file, time_t tm)
 505 {
 506 #if HAVE_UTIME
 507 # ifdef HAVE_STRUCT_UTIMBUF
 508   struct utimbuf times;
 509 # else
 510   struct {
 511     time_t actime;
 512     time_t modtime;
 513   } times;
 514 # endif
 515   times.modtime = tm;
 516   times.actime = time (NULL);
 517   if (utime (file, &times) == -1)
 518     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 519 #else
 520   struct timespec timespecs[2];
 521   int fd;
 522
 523   fd = open (file, O_WRONLY);
 524   if (fd < 0)
 525     {
 526       logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
 527       return;
 528     }
 529
 530   timespecs[0].tv_sec = time (NULL);
 531   timespecs[0].tv_nsec = 0L;
 532   timespecs[1].tv_sec = tm;
 533   timespecs[1].tv_nsec = 0L;
 534
 535   if (futimens (fd, timespecs) == -1)
 536     logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
 537
 538   close (fd);
 539 #endif
 540 }
 541
 542 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 543    nothing under MS-Windows.  */
 544 int
 545 remove_link (const char *file)
 546 {
 547   int err = 0;
 548   struct_stat st;
 549
 550   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 551     {
 552       DEBUGP (("Unlinking %s (symlink).\n", file));
 553       err = unlink (file);
 554       if (err != 0)
 555         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
 556                    quote (file), strerror (errno));
 557     }
 558   return err;
 559 }
 560
 561 /* Does FILENAME exist?  This is quite a lousy implementation, since
 562    it supplies no error codes -- only a yes-or-no answer.  Thus it
 563    will return that a file does not exist if, e.g., the directory is
 564    unreadable.  I don't mind it too much currently, though.  The
 565    proper way should, of course, be to have a third, error state,
 566    other than true/false, but that would introduce uncalled-for
 567    additional complexity to the callers.  */
 568 bool
 569 file_exists_p (const char *filename)
 570 {
 571 #ifdef HAVE_ACCESS
 572   return access (filename, F_OK) >= 0;
 573 #else
 574   struct_stat buf;
 575   return stat (filename, &buf) >= 0;
 576 #endif
 577 }
 578
 579 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 580    Returns 0 on error.  */
 581 bool
 582 file_non_directory_p (const char *path)
 583 {
 584   struct_stat buf;
 585   /* Use lstat() rather than stat() so that symbolic links pointing to
 586      directories can be identified correctly.  */
 587   if (lstat (path, &buf) != 0)
 588     return false;
 589   return S_ISDIR (buf.st_mode) ? false : true;
 590 }
 591
 592 /* Return the size of file named by FILENAME, or -1 if it cannot be
 593    opened or seeked into. */
 594 wgint
 595 file_size (const char *filename)
 596 {
 597 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
 598   wgint size;
 599   /* We use fseek rather than stat to determine the file size because
 600      that way we can also verify that the file is readable without
 601      explicitly checking for permissions.  Inspired by the POST patch
 602      by Arnaud Wylie.  */
 603   FILE *fp = fopen (filename, "rb");
 604   if (!fp)
 605     return -1;
 606   fseeko (fp, 0, SEEK_END);
 607   size = ftello (fp);
 608   fclose (fp);
 609   return size;
 610 #else
 611   struct_stat st;
 612   if (stat (filename, &st) < 0)
 613     return -1;
 614   return st.st_size;
 615 #endif
 616 }
 617
 618 /* 2005-02-19 SMS.
 619    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
 620    original name.  With the VMS file systems' versioning, everything
 621    should be fine, and appending ".NN" just causes trouble.
 622 */
 623
 624 #ifdef UNIQ_SEP
 625
 626 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
 627    doesn't exist is found.  Return a freshly allocated copy of the
 628    unused file name.  */
 629
 630 static char *
 631 unique_name_1 (const char *prefix)
 632 {
 633   int count = 1;
 634   int plen = strlen (prefix);
 635   char *template = (char *)alloca (plen + 1 + 24);
 636   char *template_tail = template + plen;
 637
 638   memcpy (template, prefix, plen);
 639   *template_tail++ = UNIQ_SEP;
 640
 641   do
 642     number_to_string (template_tail, count++);
 643   while (file_exists_p (template));
 644
 645   return xstrdup (template);
 646 }
 647
 648 /* Return a unique file name, based on FILE.
 649
 650    More precisely, if FILE doesn't exist, it is returned unmodified.
 651    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
 652    file name that doesn't exist is returned.
 653
 654    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
 655
 656    The resulting file is not created, only verified that it didn't
 657    exist at the point in time when the function was called.
 658    Therefore, where security matters, don't rely that the file created
 659    by this function exists until you open it with O_EXCL or
 660    equivalent.
 661
 662    If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
 663    string.  Otherwise, it may return FILE if the file doesn't exist
 664    (and therefore doesn't need changing).  */
 665
 666 char *
 667 unique_name (const char *file, bool allow_passthrough)
 668 {
 669   /* If the FILE itself doesn't exist, return it without
 670      modification. */
 671   if (!file_exists_p (file))
 672     return allow_passthrough ? (char *)file : xstrdup (file);
 673
 674   /* Otherwise, find a numeric suffix that results in unused file name
 675      and return it.  */
 676   return unique_name_1 (file);
 677 }
 678
 679 #else /* def UNIQ_SEP */
 680
 681 /* Dummy unique_name() for VMS.  Return the original name as easily as
 682    possible.
 683 */
 684 char *
 685 unique_name (const char *file, bool allow_passthrough)
 686 {
 687   /* Return the FILE itself, without modification, irregardful. */
 688   return allow_passthrough ? (char *)file : xstrdup (file);
 689 }
 690
 691 #endif /* def UNIQ_SEP [else] */
 692
 693 /* Create a file based on NAME, except without overwriting an existing
 694    file with that name.  Providing O_EXCL is correctly implemented,
 695    this function does not have the race condition associated with
 696    opening the file returned by unique_name.  */
 697
 698 FILE *
 699 unique_create (const char *name, bool binary, char **opened_name)
 700 {
 701   /* unique file name, based on NAME */
 702   char *uname = unique_name (name, false);
 703   FILE *fp;
 704   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
 705     {
 706       xfree (uname);
 707       uname = unique_name (name, false);
 708     }
 709   if (opened_name)
 710     {
 711       if (fp)
 712         *opened_name = uname;
 713       else
 714         {
 715           *opened_name = NULL;
 716           xfree (uname);
 717         }
 718     }
 719   else
 720     xfree (uname);
 721   return fp;
 722 }
 723
 724 /* Open the file for writing, with the addition that the file is
 725    opened "exclusively".  This means that, if the file already exists,
 726    this function will *fail* and errno will be set to EEXIST.  If
 727    BINARY is set, the file will be opened in binary mode, equivalent
 728    to fopen's "wb".
 729
 730    If opening the file fails for any reason, including the file having
 731    previously existed, this function returns NULL and sets errno
 732    appropriately.  */
 733
 734 FILE *
 735 fopen_excl (const char *fname, int binary)
 736 {
 737   int fd;
 738 #ifdef O_EXCL
 739
 740 /* 2005-04-14 SMS.
 741    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
 742    It also has file versions which obviate all the O_EXCL effort.
 743    O_TRUNC (something of a misnomer) requests a new version.
 744 */
 745 # ifdef __VMS
 746 /* Common open() optional arguments:
 747    sequential access only, access callback function.
 748 */
 749 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
 750
 751   int open_id;
 752   int flags = O_WRONLY | O_CREAT | O_TRUNC;
 753
 754   if (binary > 1)
 755     {
 756       open_id = 11;
 757       fd = open( fname,                 /* File name. */
 758        flags,                           /* Flags. */
 759        0777,                            /* Mode for default protection. */
 760        "ctx=bin,stm",                   /* Binary, stream access. */
 761        "rfm=stmlf",                     /* Stream_LF. */
 762        OPEN_OPT_ARGS);                  /* Access callback. */
 763     }
 764   else if (binary)
 765     {
 766       open_id = 12;
 767       fd = open( fname,                 /* File name. */
 768        flags,                           /* Flags. */
 769        0777,                            /* Mode for default protection. */
 770        "ctx=bin,stm",                   /* Binary, stream access. */
 771        "rfm=fix",                       /* Fixed-length, */
 772        "mrs=512",                       /* 512-byte records. */
 773        OPEN_OPT_ARGS);                  /* Access callback. */
 774     }
 775   else
 776     {
 777       open_id = 13;
 778       fd = open( fname,                 /* File name. */
 779        flags,                           /* Flags. */
 780        0777,                            /* Mode for default protection. */
 781        "rfm=stmlf",                     /* Stream_LF. */
 782        OPEN_OPT_ARGS);                  /* Access callback. */
 783     }
 784 # else /* def __VMS */
 785   int flags = O_WRONLY | O_CREAT | O_EXCL;
 786 # ifdef O_BINARY
 787   if (binary)
 788     flags |= O_BINARY;
 789 # endif
 790   fd = open (fname, flags, 0666);
 791 # endif /* def __VMS [else] */
 792
 793   if (fd < 0)
 794     return NULL;
 795   return fdopen (fd, binary ? "wb" : "w");
 796 #else  /* not O_EXCL */
 797   /* Manually check whether the file exists.  This is prone to race
 798      conditions, but systems without O_EXCL haven't deserved
 799      better.  */
 800   if (file_exists_p (fname))
 801     {
 802       errno = EEXIST;
 803       return NULL;
 804     }
 805   return fopen (fname, binary ? "wb" : "w");
 806 #endif /* not O_EXCL */
 807 }
 808 \f
 809 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 810    are missing, create them first.  In case any mkdir() call fails,
 811    return its error status.  Returns 0 on successful completion.
 812
 813    The behaviour of this function should be identical to the behaviour
 814    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 815 int
 816 make_directory (const char *directory)
 817 {
 818   int i, ret, quit = 0;
 819   char *dir;
 820
 821   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 822      function is unsafe if called with a read-only char *argument.  */
 823   STRDUP_ALLOCA (dir, directory);
 824
 825   /* If the first character of dir is '/', skip it (and thus enable
 826      creation of absolute-pathname directories.  */
 827   for (i = (*dir == '/'); 1; ++i)
 828     {
 829       for (; dir[i] && dir[i] != '/'; i++)
 830         ;
 831       if (!dir[i])
 832         quit = 1;
 833       dir[i] = '\0';
 834       /* Check whether the directory already exists.  Allow creation of
 835          of intermediate directories to fail, as the initial path components
 836          are not necessarily directories!  */
 837       if (!file_exists_p (dir))
 838         ret = mkdir (dir, 0777);
 839       else
 840         ret = 0;
 841       if (quit)
 842         break;
 843       else
 844         dir[i] = '/';
 845     }
 846   return ret;
 847 }
 848
 849 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 850    should be a file name.
 851
 852    file_merge("/foo/bar", "baz")  => "/foo/baz"
 853    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 854    file_merge("foo", "bar")       => "bar"
 855
 856    In other words, it's a simpler and gentler version of uri_merge.  */
 857
 858 char *
 859 file_merge (const char *base, const char *file)
 860 {
 861   char *result;
 862   const char *cut = (const char *)strrchr (base, '/');
 863
 864   if (!cut)
 865     return xstrdup (file);
 866
 867   result = xmalloc (cut - base + 1 + strlen (file) + 1);
 868   memcpy (result, base, cut - base);
 869   result[cut - base] = '/';
 870   strcpy (result + (cut - base) + 1, file);
 871
 872   return result;
 873 }
 874 \f
 875 /* Like fnmatch, but performs a case-insensitive match.  */
 876
 877 int
 878 fnmatch_nocase (const char *pattern, const char *string, int flags)
 879 {
 880 #ifdef FNM_CASEFOLD
 881   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
 882      also present on *BSD platforms, and possibly elsewhere.  */
 883   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
 884 #else
 885   /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
 886   char *patcopy = (char *) alloca (strlen (pattern) + 1);
 887   char *strcopy = (char *) alloca (strlen (string) + 1);
 888   char *p;
 889   for (p = patcopy; *pattern; pattern++, p++)
 890     *p = c_tolower (*pattern);
 891   *p = '\0';
 892   for (p = strcopy; *string; string++, p++)
 893     *p = c_tolower (*string);
 894   *p = '\0';
 895   return fnmatch (patcopy, strcopy, flags);
 896 #endif
 897 }
 898
 899 static bool in_acclist (const char *const *, const char *, bool);
 900
 901 /* Determine whether a file is acceptable to be followed, according to
 902    lists of patterns to accept/reject.  */
 903 bool
 904 acceptable (const char *s)
 905 {
 906   const char *p;
 907
 908   if (opt.output_document && strcmp (s, opt.output_document) == 0)
 909     return true;
 910
 911   if ((p = strrchr (s, '/')))
 912     s = p + 1;
 913
 914   if (opt.accepts)
 915     {
 916       if (opt.rejects)
 917         return (in_acclist ((const char *const *)opt.accepts, s, true)
 918                 && !in_acclist ((const char *const *)opt.rejects, s, true));
 919       else
 920         return in_acclist ((const char *const *)opt.accepts, s, true);
 921     }
 922   else if (opt.rejects)
 923     return !in_acclist ((const char *const *)opt.rejects, s, true);
 924
 925   return true;
 926 }
 927
 928 /* Determine whether an URL is acceptable to be followed, according to
 929    regex patterns to accept/reject.  */
 930 bool
 931 accept_url (const char *s)
 932 {
 933   if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
 934     return false;
 935   if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
 936     return false;
 937
 938   return true;
 939 }
 940
 941 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
 942    will return true if and only if D2 begins with `/something/' or is exactly
 943    '/something'.  */
 944 bool
 945 subdir_p (const char *d1, const char *d2)
 946 {
 947   if (*d1 == '\0')
 948     return true;
 949   if (!opt.ignore_case)
 950     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
 951       ;
 952   else
 953     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
 954       ;
 955
 956   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
 957 }
 958
 959 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
 960    first element that matches DIR, through wildcards or front comparison (as
 961    appropriate).  */
 962 static bool
 963 dir_matches_p (const char **dirlist, const char *dir)
 964 {
 965   const char **x;
 966   int (*matcher) (const char *, const char *, int)
 967     = opt.ignore_case ? fnmatch_nocase : fnmatch;
 968
 969   for (x = dirlist; *x; x++)
 970     {
 971       /* Remove leading '/' */
 972       const char *p = *x + (**x == '/');
 973       if (has_wildcards_p (p))
 974         {
 975           if (matcher (p, dir, FNM_PATHNAME) == 0)
 976             break;
 977         }
 978       else
 979         {
 980           if (subdir_p (p, dir))
 981             break;
 982         }
 983     }
 984
 985   return *x ? true : false;
 986 }
 987
 988 /* Returns whether DIRECTORY is acceptable for download, wrt the
 989    include/exclude lists.
 990
 991    The leading `/' is ignored in paths; relative and absolute paths
 992    may be freely intermixed.  */
 993
 994 bool
 995 accdir (const char *directory)
 996 {
 997   /* Remove starting '/'.  */
 998   if (*directory == '/')
 999     ++directory;
1000   if (opt.includes)
1001     {
1002       if (!dir_matches_p (opt.includes, directory))
1003         return false;
1004     }
1005   if (opt.excludes)
1006     {
1007       if (dir_matches_p (opt.excludes, directory))
1008         return false;
1009     }
1010   return true;
1011 }
1012
1013 /* Return true if STRING ends with TAIL.  For instance:
1014
1015    match_tail ("abc", "bc", false)  -> 1
1016    match_tail ("abc", "ab", false)  -> 0
1017    match_tail ("abc", "abc", false) -> 1
1018
1019    If FOLD_CASE is true, the comparison will be case-insensitive.  */
1020
1021 bool
1022 match_tail (const char *string, const char *tail, bool fold_case)
1023 {
1024   int pos = strlen (string) - strlen (tail);
1025
1026   if (pos < 0)
1027     return false;  /* tail is longer than string.  */
1028
1029   if (!fold_case)
1030     return !strcmp (string + pos, tail);
1031   else
1032     return !strcasecmp (string + pos, tail);
1033 }
1034
1035 /* Checks whether string S matches each element of ACCEPTS.  A list
1036    element are matched either with fnmatch() or match_tail(),
1037    according to whether the element contains wildcards or not.
1038
1039    If the BACKWARD is false, don't do backward comparison -- just compare
1040    them normally.  */
1041 static bool
1042 in_acclist (const char *const *accepts, const char *s, bool backward)
1043 {
1044   for (; *accepts; accepts++)
1045     {
1046       if (has_wildcards_p (*accepts))
1047         {
1048           int res = opt.ignore_case
1049             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1050           /* fnmatch returns 0 if the pattern *does* match the string.  */
1051           if (res == 0)
1052             return true;
1053         }
1054       else
1055         {
1056           if (backward)
1057             {
1058               if (match_tail (s, *accepts, opt.ignore_case))
1059                 return true;
1060             }
1061           else
1062             {
1063               int cmp = opt.ignore_case
1064                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1065               if (cmp == 0)
1066                 return true;
1067             }
1068         }
1069     }
1070   return false;
1071 }
1072
1073 /* Return the location of STR's suffix (file extension).  Examples:
1074    suffix ("foo.bar")       -> "bar"
1075    suffix ("foo.bar.baz")   -> "baz"
1076    suffix ("/foo/bar")      -> NULL
1077    suffix ("/foo.bar/baz")  -> NULL  */
1078 char *
1079 suffix (const char *str)
1080 {
1081   char *p;
1082
1083   if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
1084     return p + 1;
1085
1086   return NULL;
1087 }
1088
1089 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1090    `]').  */
1091
1092 bool
1093 has_wildcards_p (const char *s)
1094 {
1095   return !!strpbrk (s, "*?[]");
1096 }
1097
1098 /* Return true if FNAME ends with a typical HTML suffix.  The
1099    following (case-insensitive) suffixes are presumed to be HTML
1100    files:
1101
1102      html
1103      htm
1104      ?html (`?' matches one character)
1105
1106    #### CAVEAT.  This is not necessarily a good indication that FNAME
1107    refers to a file that contains HTML!  */
1108 bool
1109 has_html_suffix_p (const char *fname)
1110 {
1111   char *suf;
1112
1113   if ((suf = suffix (fname)) == NULL)
1114     return false;
1115   if (!strcasecmp (suf, "html"))
1116     return true;
1117   if (!strcasecmp (suf, "htm"))
1118     return true;
1119   if (suf[0] && !strcasecmp (suf + 1, "html"))
1120     return true;
1121   return false;
1122 }
1123
1124 /* Read FILE into memory.  A pointer to `struct file_memory' are
1125    returned; use struct element `content' to access file contents, and
1126    the element `length' to know the file length.  `content' is *not*
1127    zero-terminated, and you should *not* read or write beyond the [0,
1128    length) range of characters.
1129
1130    After you are done with the file contents, call wget_read_file_free to
1131    release the memory.
1132
1133    Depending on the operating system and the type of file that is
1134    being read, wget_read_file() either mmap's the file into memory, or
1135    reads the file into the core using read().
1136
1137    If file is named "-", fileno(stdin) is used for reading instead.
1138    If you want to read from a real file named "-", use "./-" instead.  */
1139
1140 struct file_memory *
1141 wget_read_file (const char *file)
1142 {
1143   int fd;
1144   struct file_memory *fm;
1145   long size;
1146   bool inhibit_close = false;
1147
1148   /* Some magic in the finest tradition of Perl and its kin: if FILE
1149      is "-", just use stdin.  */
1150   if (HYPHENP (file))
1151     {
1152       fd = fileno (stdin);
1153       inhibit_close = true;
1154       /* Note that we don't inhibit mmap() in this case.  If stdin is
1155          redirected from a regular file, mmap() will still work.  */
1156     }
1157   else
1158     fd = open (file, O_RDONLY);
1159   if (fd < 0)
1160     return NULL;
1161   fm = xnew (struct file_memory);
1162
1163 #ifdef HAVE_MMAP
1164   {
1165     struct_fstat buf;
1166     if (fstat (fd, &buf) < 0)
1167       goto mmap_lose;
1168     fm->length = buf.st_size;
1169     /* NOTE: As far as I know, the callers of this function never
1170        modify the file text.  Relying on this would enable us to
1171        specify PROT_READ and MAP_SHARED for a marginal gain in
1172        efficiency, but at some cost to generality.  */
1173     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1174                         MAP_PRIVATE, fd, 0);
1175     if (fm->content == (char *)MAP_FAILED)
1176       goto mmap_lose;
1177     if (!inhibit_close)
1178       close (fd);
1179
1180     fm->mmap_p = 1;
1181     return fm;
1182   }
1183
1184  mmap_lose:
1185   /* The most common reason why mmap() fails is that FD does not point
1186      to a plain file.  However, it's also possible that mmap() doesn't
1187      work for a particular type of file.  Therefore, whenever mmap()
1188      fails, we just fall back to the regular method.  */
1189 #endif /* HAVE_MMAP */
1190
1191   fm->length = 0;
1192   size = 512;                   /* number of bytes fm->contents can
1193                                    hold at any given time. */
1194   fm->content = xmalloc (size);
1195   while (1)
1196     {
1197       wgint nread;
1198       if (fm->length > size / 2)
1199         {
1200           /* #### I'm not sure whether the whole exponential-growth
1201              thing makes sense with kernel read.  On Linux at least,
1202              read() refuses to read more than 4K from a file at a
1203              single chunk anyway.  But other Unixes might optimize it
1204              better, and it doesn't *hurt* anything, so I'm leaving
1205              it.  */
1206
1207           /* Normally, we grow SIZE exponentially to make the number
1208              of calls to read() and realloc() logarithmic in relation
1209              to file size.  However, read() can read an amount of data
1210              smaller than requested, and it would be unreasonable to
1211              double SIZE every time *something* was read.  Therefore,
1212              we double SIZE only when the length exceeds half of the
1213              entire allocated size.  */
1214           size <<= 1;
1215           fm->content = xrealloc (fm->content, size);
1216         }
1217       nread = read (fd, fm->content + fm->length, size - fm->length);
1218       if (nread > 0)
1219         /* Successful read. */
1220         fm->length += nread;
1221       else if (nread < 0)
1222         /* Error. */
1223         goto lose;
1224       else
1225         /* EOF */
1226         break;
1227     }
1228   if (!inhibit_close)
1229     close (fd);
1230   if (size > fm->length && fm->length != 0)
1231     /* Due to exponential growth of fm->content, the allocated region
1232        might be much larger than what is actually needed.  */
1233     fm->content = xrealloc (fm->content, fm->length);
1234   fm->mmap_p = 0;
1235   return fm;
1236
1237  lose:
1238   if (!inhibit_close)
1239     close (fd);
1240   xfree (fm->content);
1241   xfree (fm);
1242   return NULL;
1243 }
1244
1245 /* Release the resources held by FM.  Specifically, this calls
1246    munmap() or xfree() on fm->content, depending whether mmap or
1247    malloc/read were used to read in the file.  It also frees the
1248    memory needed to hold the FM structure itself.  */
1249
1250 void
1251 wget_read_file_free (struct file_memory *fm)
1252 {
1253 #ifdef HAVE_MMAP
1254   if (fm->mmap_p)
1255     {
1256       munmap (fm->content, fm->length);
1257     }
1258   else
1259 #endif
1260     {
1261       xfree (fm->content);
1262     }
1263   xfree (fm);
1264 }
1265 \f
1266 /* Free the pointers in a NULL-terminated vector of pointers, then
1267    free the pointer itself.  */
1268 void
1269 free_vec (char **vec)
1270 {
1271   if (vec)
1272     {
1273       char **p = vec;
1274       while (*p)
1275         xfree (*p++);
1276       xfree (vec);
1277     }
1278 }
1279
1280 /* Append vector V2 to vector V1.  The function frees V2 and
1281    reallocates V1 (thus you may not use the contents of neither
1282    pointer after the call).  If V1 is NULL, V2 is returned.  */
1283 char **
1284 merge_vecs (char **v1, char **v2)
1285 {
1286   int i, j;
1287
1288   if (!v1)
1289     return v2;
1290   if (!v2)
1291     return v1;
1292   if (!*v2)
1293     {
1294       /* To avoid j == 0 */
1295       xfree (v2);
1296       return v1;
1297     }
1298   /* Count v1.  */
1299   for (i = 0; v1[i]; i++)
1300     ;
1301   /* Count v2.  */
1302   for (j = 0; v2[j]; j++)
1303     ;
1304   /* Reallocate v1.  */
1305   v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1306   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1307   xfree (v2);
1308   return v1;
1309 }
1310
1311 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1312    is allocated as needed.  Return the new value of the vector. */
1313
1314 char **
1315 vec_append (char **vec, const char *str)
1316 {
1317   int cnt;                      /* count of vector elements, including
1318                                    the one we're about to append */
1319   if (vec != NULL)
1320     {
1321       for (cnt = 0; vec[cnt]; cnt++)
1322         ;
1323       ++cnt;
1324     }
1325   else
1326     cnt = 1;
1327   /* Reallocate the array to fit the new element and the NULL. */
1328   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1329   /* Append a copy of STR to the vector. */
1330   vec[cnt - 1] = xstrdup (str);
1331   vec[cnt] = NULL;
1332   return vec;
1333 }
1334 \f
1335 /* Sometimes it's useful to create "sets" of strings, i.e. special
1336    hash tables where you want to store strings as keys and merely
1337    query for their existence.  Here is a set of utility routines that
1338    makes that transparent.  */
1339
1340 void
1341 string_set_add (struct hash_table *ht, const char *s)
1342 {
1343   /* First check whether the set element already exists.  If it does,
1344      do nothing so that we don't have to free() the old element and
1345      then strdup() a new one.  */
1346   if (hash_table_contains (ht, s))
1347     return;
1348
1349   /* We use "1" as value.  It provides us a useful and clear arbitrary
1350      value, and it consumes no memory -- the pointers to the same
1351      string "1" will be shared by all the key-value pairs in all `set'
1352      hash tables.  */
1353   hash_table_put (ht, xstrdup (s), "1");
1354 }
1355
1356 /* Synonym for hash_table_contains... */
1357
1358 int
1359 string_set_contains (struct hash_table *ht, const char *s)
1360 {
1361   return hash_table_contains (ht, s);
1362 }
1363
1364 /* Convert the specified string set to array.  ARRAY should be large
1365    enough to hold hash_table_count(ht) char pointers.  */
1366
1367 void string_set_to_array (struct hash_table *ht, char **array)
1368 {
1369   hash_table_iterator iter;
1370   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1371     *array++ = iter.key;
1372 }
1373
1374 /* Free the string set.  This frees both the storage allocated for
1375    keys and the actual hash table.  (hash_table_destroy would only
1376    destroy the hash table.)  */
1377
1378 void
1379 string_set_free (struct hash_table *ht)
1380 {
1381   hash_table_iterator iter;
1382   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1383     xfree (iter.key);
1384   hash_table_destroy (ht);
1385 }
1386
1387 /* Utility function: simply call xfree() on all keys and values of HT.  */
1388
1389 void
1390 free_keys_and_values (struct hash_table *ht)
1391 {
1392   hash_table_iterator iter;
1393   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1394     {
1395       xfree (iter.key);
1396       xfree (iter.value);
1397     }
1398 }
1399 \f
1400 /* Get digit grouping data for thousand separors by calling
1401    localeconv().  The data includes separator string and grouping info
1402    and is cached after the first call to the function.
1403
1404    In locales that don't set a thousand separator (such as the "C"
1405    locale), this forces it to be ",".  We are now only showing
1406    thousand separators in one place, so this shouldn't be a problem in
1407    practice.  */
1408
1409 static void
1410 get_grouping_data (const char **sep, const char **grouping)
1411 {
1412   static const char *cached_sep;
1413   static const char *cached_grouping;
1414   static bool initialized;
1415   if (!initialized)
1416     {
1417       /* Get the grouping info from the locale. */
1418       struct lconv *lconv = localeconv ();
1419       cached_sep = lconv->thousands_sep;
1420       cached_grouping = lconv->grouping;
1421 #if ! USE_NLS_PROGRESS_BAR
1422       /* We can't count column widths, so ensure that the separator
1423        * is single-byte only (let check below determine what byte). */
1424       if (strlen(cached_sep) > 1)
1425         cached_sep = "";
1426 #endif
1427       if (!*cached_sep)
1428         {
1429           /* Many locales (such as "C" or "hr_HR") don't specify
1430              grouping, which we still want to use it for legibility.
1431              In those locales set the sep char to ',', unless that
1432              character is used for decimal point, in which case set it
1433              to ".".  */
1434           if (*lconv->decimal_point != ',')
1435             cached_sep = ",";
1436           else
1437             cached_sep = ".";
1438           cached_grouping = "\x03";
1439         }
1440       initialized = true;
1441     }
1442   *sep = cached_sep;
1443   *grouping = cached_grouping;
1444 }
1445
1446 /* Return a printed representation of N with thousand separators.
1447    This should respect locale settings, with the exception of the "C"
1448    locale which mandates no separator, but we use one anyway.
1449
1450    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1451    the separators because it's too non-portable, and it's hard to test
1452    for this feature at configure time.  Besides, it wouldn't display
1453    separators in the "C" locale, still used by many Unix users.  */
1454
1455 const char *
1456 with_thousand_seps (wgint n)
1457 {
1458   static char outbuf[48];
1459   char *p = outbuf + sizeof outbuf;
1460
1461   /* Info received from locale */
1462   const char *grouping, *sep;
1463   int seplen;
1464
1465   /* State information */
1466   int i = 0, groupsize;
1467   const char *atgroup;
1468
1469   bool negative = n < 0;
1470
1471   /* Initialize grouping data. */
1472   get_grouping_data (&sep, &grouping);
1473   seplen = strlen (sep);
1474   atgroup = grouping;
1475   groupsize = *atgroup++;
1476
1477   /* This would overflow on WGINT_MIN, but printing negative numbers
1478      is not an important goal of this fuinction.  */
1479   if (negative)
1480     n = -n;
1481
1482   /* Write the number into the buffer, backwards, inserting the
1483      separators as necessary.  */
1484   *--p = '\0';
1485   while (1)
1486     {
1487       *--p = n % 10 + '0';
1488       n /= 10;
1489       if (n == 0)
1490         break;
1491       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1492       if (++i == groupsize)
1493         {
1494           if (seplen == 1)
1495             *--p = *sep;
1496           else
1497             memcpy (p -= seplen, sep, seplen);
1498           i = 0;
1499           if (*atgroup)
1500             groupsize = *atgroup++;
1501         }
1502     }
1503   if (negative)
1504     *--p = '-';
1505
1506   return p;
1507 }
1508
1509 /* N, a byte quantity, is converted to a human-readable abberviated
1510    form a la sizes printed by `ls -lh'.  The result is written to a
1511    static buffer, a pointer to which is returned.
1512
1513    Unlike `with_thousand_seps', this approximates to the nearest unit.
1514    Quoting GNU libit: "Most people visually process strings of 3-4
1515    digits effectively, but longer strings of digits are more prone to
1516    misinterpretation.  Hence, converting to an abbreviated form
1517    usually improves readability."
1518
1519    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1520    original computer-related meaning of "powers of 1024".  We don't
1521    use the "*bibyte" names invented in 1998, and seldom used in
1522    practice.  Wikipedia's entry on "binary prefix" discusses this in
1523    some detail.  */
1524
1525 char *
1526 human_readable (HR_NUMTYPE n, const int acc, const int decimals)
1527 {
1528   /* These suffixes are compatible with those of GNU `ls -lh'. */
1529   static char powers[] =
1530     {
1531       'K',                      /* kilobyte, 2^10 bytes */
1532       'M',                      /* megabyte, 2^20 bytes */
1533       'G',                      /* gigabyte, 2^30 bytes */
1534       'T',                      /* terabyte, 2^40 bytes */
1535       'P',                      /* petabyte, 2^50 bytes */
1536       'E',                      /* exabyte,  2^60 bytes */
1537     };
1538   static char buf[8];
1539   size_t i;
1540
1541   /* If the quantity is smaller than 1K, just print it. */
1542   if (n < 1024)
1543     {
1544       snprintf (buf, sizeof (buf), "%d", (int) n);
1545       return buf;
1546     }
1547
1548   /* Loop over powers, dividing N with 1024 in each iteration.  This
1549      works unchanged for all sizes of wgint, while still avoiding
1550      non-portable `long double' arithmetic.  */
1551   for (i = 0; i < countof (powers); i++)
1552     {
1553       /* At each iteration N is greater than the *subsequent* power.
1554          That way N/1024.0 produces a decimal number in the units of
1555          *this* power.  */
1556       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1557         {
1558           double val = n / 1024.0;
1559           /* Print values smaller than the accuracy level (acc) with (decimal)
1560            * decimal digits, and others without any decimals.  */
1561           snprintf (buf, sizeof (buf), "%.*f%c",
1562                     val < acc ? decimals : 0, val, powers[i]);
1563           return buf;
1564         }
1565       n /= 1024;
1566     }
1567   return NULL;                  /* unreached */
1568 }
1569
1570 /* Count the digits in the provided number.  Used to allocate space
1571    when printing numbers.  */
1572
1573 int
1574 numdigit (wgint number)
1575 {
1576   int cnt = 1;
1577   if (number < 0)
1578     ++cnt;                      /* accomodate '-' */
1579   while ((number /= 10) != 0)
1580     ++cnt;
1581   return cnt;
1582 }
1583
1584 #define PR(mask) *p++ = n / (mask) + '0'
1585
1586 /* DIGITS_<D> is used to print a D-digit number and should be called
1587    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1588    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1589    Recursively this continues until DIGITS_1 is invoked.  */
1590
1591 #define DIGITS_1(mask) PR (mask)
1592 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1593 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1594 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1595 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1596 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1597 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1598 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1599 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1600 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1601
1602 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1603
1604 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1605 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1606 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1607 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1608 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1609 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1610 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1611 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1612 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1613
1614 /* Shorthand for casting to wgint. */
1615 #define W wgint
1616
1617 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1618    `sprintf(buffer, "%lld", (long long) number)', only typically much
1619    faster and portable to machines without long long.
1620
1621    The speedup may make a difference in programs that frequently
1622    convert numbers to strings.  Some implementations of sprintf,
1623    particularly the one in some versions of GNU libc, have been known
1624    to be quite slow when converting integers to strings.
1625
1626    Return the pointer to the location where the terminating zero was
1627    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1628    function is done.)
1629
1630    BUFFER should be large enough to accept as many bytes as you expect
1631    the number to take up.  On machines with 64-bit wgints the maximum
1632    needed size is 24 bytes.  That includes the digits needed for the
1633    largest 64-bit number, the `-' sign in case it's negative, and the
1634    terminating '\0'.  */
1635
1636 char *
1637 number_to_string (char *buffer, wgint number)
1638 {
1639   char *p = buffer;
1640   wgint n = number;
1641
1642   int last_digit_char = 0;
1643
1644 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1645   /* We are running in a very strange environment.  Leave the correct
1646      printing to sprintf.  */
1647   p += sprintf (buf, "%j", (intmax_t) (n));
1648 #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1649
1650   if (n < 0)
1651     {
1652       if (n < -WGINT_MAX)
1653         {
1654           /* n = -n would overflow because -n would evaluate to a
1655              wgint value larger than WGINT_MAX.  Need to make n
1656              smaller and handle the last digit separately.  */
1657           int last_digit = n % 10;
1658           /* The sign of n%10 is implementation-defined. */
1659           if (last_digit < 0)
1660             last_digit_char = '0' - last_digit;
1661           else
1662             last_digit_char = '0' + last_digit;
1663           /* After n is made smaller, -n will not overflow. */
1664           n /= 10;
1665         }
1666
1667       *p++ = '-';
1668       n = -n;
1669     }
1670
1671   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1672      way printing any N is fully open-coded without a loop or jump.
1673      (Also see description of DIGITS_*.)  */
1674
1675   if      (n < 10)                       DIGITS_1 (1);
1676   else if (n < 100)                      DIGITS_2 (10);
1677   else if (n < 1000)                     DIGITS_3 (100);
1678   else if (n < 10000)                    DIGITS_4 (1000);
1679   else if (n < 100000)                   DIGITS_5 (10000);
1680   else if (n < 1000000)                  DIGITS_6 (100000);
1681   else if (n < 10000000)                 DIGITS_7 (1000000);
1682   else if (n < 100000000)                DIGITS_8 (10000000);
1683   else if (n < 1000000000)               DIGITS_9 (100000000);
1684 #if SIZEOF_WGINT == 4
1685   /* wgint is 32 bits wide: no number has more than 10 digits. */
1686   else                                   DIGITS_10 (1000000000);
1687 #else
1688   /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1689      Constants are constructed by compile-time multiplication to avoid
1690      dealing with different notations for 64-bit constants
1691      (nL/nLL/nI64, depending on the compiler and architecture).  */
1692   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1693   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1694   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1695   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1696   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1697   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1698   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1699   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1700   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1701   else                                   DIGITS_19 (1000000000*(W)1000000000);
1702 #endif
1703
1704   if (last_digit_char)
1705     *p++ = last_digit_char;
1706
1707   *p = '\0';
1708 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1709
1710   return p;
1711 }
1712
1713 #undef PR
1714 #undef W
1715 #undef SPRINTF_WGINT
1716 #undef DIGITS_1
1717 #undef DIGITS_2
1718 #undef DIGITS_3
1719 #undef DIGITS_4
1720 #undef DIGITS_5
1721 #undef DIGITS_6
1722 #undef DIGITS_7
1723 #undef DIGITS_8
1724 #undef DIGITS_9
1725 #undef DIGITS_10
1726 #undef DIGITS_11
1727 #undef DIGITS_12
1728 #undef DIGITS_13
1729 #undef DIGITS_14
1730 #undef DIGITS_15
1731 #undef DIGITS_16
1732 #undef DIGITS_17
1733 #undef DIGITS_18
1734 #undef DIGITS_19
1735
1736 #define RING_SIZE 3
1737
1738 /* Print NUMBER to a statically allocated string and return a pointer
1739    to the printed representation.
1740
1741    This function is intended to be used in conjunction with printf.
1742    It is hard to portably print wgint values:
1743     a) you cannot use printf("%ld", number) because wgint can be long
1744        long on 32-bit machines with LFS.
1745     b) you cannot use printf("%lld", number) because NUMBER could be
1746        long on 32-bit machines without LFS, or on 64-bit machines,
1747        which do not require LFS.  Also, Windows doesn't support %lld.
1748     c) you cannot use printf("%j", (int_max_t) number) because not all
1749        versions of printf support "%j", the most notable being the one
1750        on Windows.
1751     d) you cannot #define WGINT_FMT to the appropriate format and use
1752        printf(WGINT_FMT, number) because that would break translations
1753        for user-visible messages, such as printf("Downloaded: %d
1754        bytes\n", number).
1755
1756    What you should use instead is printf("%s", number_to_static_string
1757    (number)).
1758
1759    CAVEAT: since the function returns pointers to static data, you
1760    must be careful to copy its result before calling it again.
1761    However, to make it more useful with printf, the function maintains
1762    an internal ring of static buffers to return.  That way things like
1763    printf("%s %s", number_to_static_string (num1),
1764    number_to_static_string (num2)) work as expected.  Three buffers
1765    are currently used, which means that "%s %s %s" will work, but "%s
1766    %s %s %s" won't.  If you need to print more than three wgints,
1767    bump the RING_SIZE (or rethink your message.)  */
1768
1769 char *
1770 number_to_static_string (wgint number)
1771 {
1772   static char ring[RING_SIZE][24];
1773   static int ringpos;
1774   char *buf = ring[ringpos];
1775   number_to_string (buf, number);
1776   ringpos = (ringpos + 1) % RING_SIZE;
1777   return buf;
1778 }
1779
1780 /* Converts the byte to bits format if --report-bps option is enabled
1781  */
1782 wgint
1783 convert_to_bits (wgint num)
1784 {
1785   if (opt.report_bps)
1786     return num * 8;
1787   return num;
1788 }
1789
1790 \f
1791 /* Determine the width of the terminal we're running on.  If that's
1792    not possible, return 0.  */
1793
1794 int
1795 determine_screen_width (void)
1796 {
1797   /* If there's a way to get the terminal size using POSIX
1798      tcgetattr(), somebody please tell me.  */
1799 #ifdef TIOCGWINSZ
1800   int fd;
1801   struct winsize wsz;
1802
1803   if (opt.lfilename != NULL)
1804     return 0;
1805
1806   fd = fileno (stderr);
1807   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1808     return 0;                   /* most likely ENOTTY */
1809
1810   return wsz.ws_col;
1811 #elif defined(WINDOWS)
1812   CONSOLE_SCREEN_BUFFER_INFO csbi;
1813   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1814     return 0;
1815   return csbi.dwSize.X;
1816 #else  /* neither TIOCGWINSZ nor WINDOWS */
1817   return 0;
1818 #endif /* neither TIOCGWINSZ nor WINDOWS */
1819 }
1820 \f
1821 /* Whether the rnd system (either rand or [dl]rand48) has been
1822    seeded.  */
1823 static int rnd_seeded;
1824
1825 /* Return a random number between 0 and MAX-1, inclusive.
1826
1827    If the system does not support lrand48 and MAX is greater than the
1828    value of RAND_MAX+1 on the system, the returned value will be in
1829    the range [0, RAND_MAX].  This may be fixed in a future release.
1830    The random number generator is seeded automatically the first time
1831    it is called.
1832
1833    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1834    for cryptography.  It is only meant to be used in situations where
1835    quality of the random numbers returned doesn't really matter.  */
1836
1837 int
1838 random_number (int max)
1839 {
1840 #ifdef HAVE_DRAND48
1841   if (!rnd_seeded)
1842     {
1843       srand48 ((long) time (NULL) ^ (long) getpid ());
1844       rnd_seeded = 1;
1845     }
1846   return lrand48 () % max;
1847 #else  /* not HAVE_DRAND48 */
1848
1849   double bounded;
1850   int rnd;
1851   if (!rnd_seeded)
1852     {
1853       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1854       rnd_seeded = 1;
1855     }
1856   rnd = rand ();
1857
1858   /* Like rand() % max, but uses the high-order bits for better
1859      randomness on architectures where rand() is implemented using a
1860      simple congruential generator.  */
1861
1862   bounded = (double) max * rnd / (RAND_MAX + 1.0);
1863   return (int) bounded;
1864
1865 #endif /* not HAVE_DRAND48 */
1866 }
1867
1868 /* Return a random uniformly distributed floating point number in the
1869    [0, 1) range.  Uses drand48 where available, and a really lame
1870    kludge elsewhere.  */
1871
1872 double
1873 random_float (void)
1874 {
1875 #ifdef HAVE_DRAND48
1876   if (!rnd_seeded)
1877     {
1878       srand48 ((long) time (NULL) ^ (long) getpid ());
1879       rnd_seeded = 1;
1880     }
1881   return drand48 ();
1882 #else  /* not HAVE_DRAND48 */
1883   return (  random_number (10000) / 10000.0
1884           + random_number (10000) / (10000.0 * 10000.0)
1885           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1886           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1887 #endif /* not HAVE_DRAND48 */
1888 }
1889 \f
1890 /* Implementation of run_with_timeout, a generic timeout-forcing
1891    routine for systems with Unix-like signal handling.  */
1892
1893 #ifdef USE_SIGNAL_TIMEOUT
1894 # ifdef HAVE_SIGSETJMP
1895 #  define SETJMP(env) sigsetjmp (env, 1)
1896
1897 static sigjmp_buf run_with_timeout_env;
1898
1899 static void
1900 abort_run_with_timeout (int sig)
1901 {
1902   assert (sig == SIGALRM);
1903   siglongjmp (run_with_timeout_env, -1);
1904 }
1905 # else /* not HAVE_SIGSETJMP */
1906 #  define SETJMP(env) setjmp (env)
1907
1908 static jmp_buf run_with_timeout_env;
1909
1910 static void
1911 abort_run_with_timeout (int sig)
1912 {
1913   assert (sig == SIGALRM);
1914   /* We don't have siglongjmp to preserve the set of blocked signals;
1915      if we longjumped out of the handler at this point, SIGALRM would
1916      remain blocked.  We must unblock it manually. */
1917   sigset_t set;
1918   sigemptyset (&set);
1919   sigaddset (&set, SIGALRM);
1920   sigprocmask (SIG_BLOCK, &set, NULL);
1921
1922   /* Now it's safe to longjump. */
1923   longjmp (run_with_timeout_env, -1);
1924 }
1925 # endif /* not HAVE_SIGSETJMP */
1926
1927 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1928    setitimer where available, alarm otherwise.
1929
1930    TIMEOUT should be non-zero.  If the timeout value is so small that
1931    it would be rounded to zero, it is rounded to the least legal value
1932    instead (1us for setitimer, 1s for alarm).  That ensures that
1933    SIGALRM will be delivered in all cases.  */
1934
1935 static void
1936 alarm_set (double timeout)
1937 {
1938 #ifdef ITIMER_REAL
1939   /* Use the modern itimer interface. */
1940   struct itimerval itv;
1941   xzero (itv);
1942   itv.it_value.tv_sec = (long) timeout;
1943   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1944   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1945     /* Ensure that we wait for at least the minimum interval.
1946        Specifying zero would mean "wait forever".  */
1947     itv.it_value.tv_usec = 1;
1948   setitimer (ITIMER_REAL, &itv, NULL);
1949 #else  /* not ITIMER_REAL */
1950   /* Use the old alarm() interface. */
1951   int secs = (int) timeout;
1952   if (secs == 0)
1953     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1954        because alarm(0) means "never deliver the alarm", i.e. "wait
1955        forever", which is not what someone who specifies a 0.5s
1956        timeout would expect.  */
1957     secs = 1;
1958   alarm (secs);
1959 #endif /* not ITIMER_REAL */
1960 }
1961
1962 /* Cancel the alarm set with alarm_set. */
1963
1964 static void
1965 alarm_cancel (void)
1966 {
1967 #ifdef ITIMER_REAL
1968   struct itimerval disable;
1969   xzero (disable);
1970   setitimer (ITIMER_REAL, &disable, NULL);
1971 #else  /* not ITIMER_REAL */
1972   alarm (0);
1973 #endif /* not ITIMER_REAL */
1974 }
1975
1976 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1977    seconds.  Returns true if the function was interrupted with a
1978    timeout, false otherwise.
1979
1980    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1981    using setitimer() or alarm().  The timeout is enforced by
1982    longjumping out of the SIGALRM handler.  This has several
1983    advantages compared to the traditional approach of relying on
1984    signals causing system calls to exit with EINTR:
1985
1986      * The callback function is *forcibly* interrupted after the
1987        timeout expires, (almost) regardless of what it was doing and
1988        whether it was in a syscall.  For example, a calculation that
1989        takes a long time is interrupted as reliably as an IO
1990        operation.
1991
1992      * It works with both SYSV and BSD signals because it doesn't
1993        depend on the default setting of SA_RESTART.
1994
1995      * It doesn't require special handler setup beyond a simple call
1996        to signal().  (It does use sigsetjmp/siglongjmp, but they're
1997        optional.)
1998
1999    The only downside is that, if FUN allocates internal resources that
2000    are normally freed prior to exit from the functions, they will be
2001    lost in case of timeout.  */
2002
2003 bool
2004 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2005 {
2006   int saved_errno;
2007
2008   if (timeout == 0)
2009     {
2010       fun (arg);
2011       return false;
2012     }
2013
2014   signal (SIGALRM, abort_run_with_timeout);
2015   if (SETJMP (run_with_timeout_env) != 0)
2016     {
2017       /* Longjumped out of FUN with a timeout. */
2018       signal (SIGALRM, SIG_DFL);
2019       return true;
2020     }
2021   alarm_set (timeout);
2022   fun (arg);
2023
2024   /* Preserve errno in case alarm() or signal() modifies it. */
2025   saved_errno = errno;
2026   alarm_cancel ();
2027   signal (SIGALRM, SIG_DFL);
2028   errno = saved_errno;
2029
2030   return false;
2031 }
2032
2033 #else  /* not USE_SIGNAL_TIMEOUT */
2034
2035 #ifndef WINDOWS
2036 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2037    define it under Windows, because Windows has its own version of
2038    run_with_timeout that uses threads.  */
2039
2040 bool
2041 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2042 {
2043   fun (arg);
2044   return false;
2045 }
2046 #endif /* not WINDOWS */
2047 #endif /* not USE_SIGNAL_TIMEOUT */
2048 \f
2049 #ifndef WINDOWS
2050
2051 /* Sleep the specified amount of seconds.  On machines without
2052    nanosleep(), this may sleep shorter if interrupted by signals.  */
2053
2054 void
2055 xsleep (double seconds)
2056 {
2057 #ifdef HAVE_NANOSLEEP
2058   /* nanosleep is the preferred interface because it offers high
2059      accuracy and, more importantly, because it allows us to reliably
2060      restart receiving a signal such as SIGWINCH.  (There was an
2061      actual Debian bug report about --limit-rate malfunctioning while
2062      the terminal was being resized.)  */
2063   struct timespec sleep, remaining;
2064   sleep.tv_sec = (long) seconds;
2065   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2066   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2067     /* If nanosleep has been interrupted by a signal, adjust the
2068        sleeping period and return to sleep.  */
2069     sleep = remaining;
2070 #elif defined(HAVE_USLEEP)
2071   /* If usleep is available, use it in preference to select.  */
2072   if (seconds >= 1)
2073     {
2074       /* On some systems, usleep cannot handle values larger than
2075          1,000,000.  If the period is larger than that, use sleep
2076          first, then add usleep for subsecond accuracy.  */
2077       sleep (seconds);
2078       seconds -= (long) seconds;
2079     }
2080   usleep (seconds * 1000000);
2081 #else /* fall back select */
2082   /* Note that, although Windows supports select, it can't be used to
2083      implement sleeping because Winsock's select doesn't implement
2084      timeout when it is passed NULL pointers for all fd sets.  (But it
2085      does under Cygwin, which implements Unix-compatible select.)  */
2086   struct timeval sleep;
2087   sleep.tv_sec = (long) seconds;
2088   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2089   select (0, NULL, NULL, NULL, &sleep);
2090   /* If select returns -1 and errno is EINTR, it means we were
2091      interrupted by a signal.  But without knowing how long we've
2092      actually slept, we can't return to sleep.  Using gettimeofday to
2093      track sleeps is slow and unreliable due to clock skew.  */
2094 #endif
2095 }
2096
2097 #endif /* not WINDOWS */
2098
2099 /* Encode the octets in DATA of length LENGTH to base64 format,
2100    storing the result to DEST.  The output will be zero-terminated,
2101    and must point to a writable buffer of at least
2102    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2103    the resulting base64 data, not counting the terminating zero.
2104
2105    This implementation does not emit newlines after 76 characters of
2106    base64 data.  */
2107
2108 size_t
2109 base64_encode (const void *data, size_t length, char *dest)
2110 {
2111   /* Conversion table.  */
2112   static const char tbl[64] = {
2113     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2114     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2115     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2116     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2117   };
2118   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2119      don't work for data with MSB set. */
2120   const unsigned char *s = data;
2121   /* Theoretical ANSI violation when length < 3. */
2122   const unsigned char *end = (const unsigned char *) data + length - 2;
2123   char *p = dest;
2124
2125   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2126   for (; s < end; s += 3)
2127     {
2128       *p++ = tbl[s[0] >> 2];
2129       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2130       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2131       *p++ = tbl[s[2] & 0x3f];
2132     }
2133
2134   /* Pad the result if necessary...  */
2135   switch (length % 3)
2136     {
2137     case 1:
2138       *p++ = tbl[s[0] >> 2];
2139       *p++ = tbl[(s[0] & 3) << 4];
2140       *p++ = '=';
2141       *p++ = '=';
2142       break;
2143     case 2:
2144       *p++ = tbl[s[0] >> 2];
2145       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2146       *p++ = tbl[((s[1] & 0xf) << 2)];
2147       *p++ = '=';
2148       break;
2149     }
2150   /* ...and zero-terminate it.  */
2151   *p = '\0';
2152
2153   return p - dest;
2154 }
2155
2156 /* Store in C the next non-whitespace character from the string, or \0
2157    when end of string is reached.  */
2158 #define NEXT_CHAR(c, p) do {                    \
2159   c = (unsigned char) *p++;                     \
2160 } while (c_isspace (c))
2161
2162 #define IS_ASCII(c) (((c) & 0x80) == 0)
2163
2164 /* Decode data from BASE64 (a null-terminated string) into memory
2165    pointed to by DEST.  DEST is assumed to be large enough to
2166    accomodate the decoded data, which is guaranteed to be no more than
2167    3/4*strlen(base64).
2168
2169    Since DEST is assumed to contain binary data, it is not
2170    NUL-terminated.  The function returns the length of the data
2171    written to TO.  -1 is returned in case of error caused by malformed
2172    base64 input.
2173
2174    This function originates from Free Recode.  */
2175
2176 ssize_t
2177 base64_decode (const char *base64, void *dest)
2178 {
2179   /* Table of base64 values for first 128 characters.  Note that this
2180      assumes ASCII (but so does Wget in other places).  */
2181   static const signed char base64_char_to_value[128] =
2182     {
2183       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2184       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2185       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2186       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2187       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2188       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2189       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2190       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2191       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2192       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2193       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2194       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2195       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2196     };
2197 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2198 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2199
2200   const char *p = base64;
2201   char *q = dest;
2202
2203   while (1)
2204     {
2205       unsigned char c;
2206       unsigned long value;
2207
2208       /* Process first byte of a quadruplet.  */
2209       NEXT_CHAR (c, p);
2210       if (!c)
2211         break;
2212       if (c == '=' || !IS_BASE64 (c))
2213         return -1;              /* illegal char while decoding base64 */
2214       value = BASE64_CHAR_TO_VALUE (c) << 18;
2215
2216       /* Process second byte of a quadruplet.  */
2217       NEXT_CHAR (c, p);
2218       if (!c)
2219         return -1;              /* premature EOF while decoding base64 */
2220       if (c == '=' || !IS_BASE64 (c))
2221         return -1;              /* illegal char while decoding base64 */
2222       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2223       *q++ = value >> 16;
2224
2225       /* Process third byte of a quadruplet.  */
2226       NEXT_CHAR (c, p);
2227       if (!c)
2228         return -1;              /* premature EOF while decoding base64 */
2229       if (!IS_BASE64 (c))
2230         return -1;              /* illegal char while decoding base64 */
2231
2232       if (c == '=')
2233         {
2234           NEXT_CHAR (c, p);
2235           if (!c)
2236             return -1;          /* premature EOF while decoding base64 */
2237           if (c != '=')
2238             return -1;          /* padding `=' expected but not found */
2239           continue;
2240         }
2241
2242       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2243       *q++ = 0xff & value >> 8;
2244
2245       /* Process fourth byte of a quadruplet.  */
2246       NEXT_CHAR (c, p);
2247       if (!c)
2248         return -1;              /* premature EOF while decoding base64 */
2249       if (c == '=')
2250         continue;
2251       if (!IS_BASE64 (c))
2252         return -1;              /* illegal char while decoding base64 */
2253
2254       value |= BASE64_CHAR_TO_VALUE (c);
2255       *q++ = 0xff & value;
2256     }
2257 #undef IS_BASE64
2258 #undef BASE64_CHAR_TO_VALUE
2259
2260   return q - (char *) dest;
2261 }
2262
2263 #ifdef HAVE_LIBPCRE
2264 /* Compiles the PCRE regex. */
2265 void *
2266 compile_pcre_regex (const char *str)
2267 {
2268   const char *errbuf;
2269   int erroffset;
2270   pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
2271   if (! regex)
2272     {
2273       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2274                quote (str), errbuf);
2275       return false;
2276     }
2277   return regex;
2278 }
2279 #endif
2280
2281 /* Compiles the POSIX regex. */
2282 void *
2283 compile_posix_regex (const char *str)
2284 {
2285   regex_t *regex = xmalloc (sizeof (regex_t));
2286   int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
2287   if (errcode != 0)
2288     {
2289       size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
2290       char *errbuf = xmalloc (errbuf_size);
2291       regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
2292       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2293                quote (str), errbuf);
2294       xfree (errbuf);
2295       return NULL;
2296     }
2297
2298   return regex;
2299 }
2300
2301 #ifdef HAVE_LIBPCRE
2302 #define OVECCOUNT 30
2303 /* Matches a PCRE regex.  */
2304 bool
2305 match_pcre_regex (const void *regex, const char *str)
2306 {
2307   size_t l = strlen (str);
2308   int ovector[OVECCOUNT];
2309
2310   int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT);
2311   if (rc == PCRE_ERROR_NOMATCH)
2312     return false;
2313   else if (rc < 0)
2314     {
2315       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2316                  quote (str), rc);
2317       return false;
2318     }
2319   else
2320     return true;
2321 }
2322 #undef OVECCOUNT
2323 #endif
2324
2325 /* Matches a POSIX regex.  */
2326 bool
2327 match_posix_regex (const void *regex, const char *str)
2328 {
2329   int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
2330   if (rc == REG_NOMATCH)
2331     return false;
2332   else if (rc == 0)
2333     return true;
2334   else
2335     {
2336       size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
2337       char *errbuf = xmalloc (errbuf_size);
2338       regerror (rc, opt.acceptregex, errbuf, errbuf_size);
2339       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2340                  quote (str), rc);
2341       xfree (errbuf);
2342       return false;
2343     }
2344 }
2345
2346 #undef IS_ASCII
2347 #undef NEXT_CHAR
2348 \f
2349 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2350    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2351
2352 static void
2353 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2354                     int (*cmpfun) (const void *, const void *))
2355 {
2356 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2357   if (from < to)
2358     {
2359       size_t i, j, k;
2360       size_t mid = (to + from) / 2;
2361       mergesort_internal (base, temp, size, from, mid, cmpfun);
2362       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2363       i = from;
2364       j = mid + 1;
2365       for (k = from; (i <= mid) && (j <= to); k++)
2366         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2367           memcpy (ELT (temp, k), ELT (base, i++), size);
2368         else
2369           memcpy (ELT (temp, k), ELT (base, j++), size);
2370       while (i <= mid)
2371         memcpy (ELT (temp, k++), ELT (base, i++), size);
2372       while (j <= to)
2373         memcpy (ELT (temp, k++), ELT (base, j++), size);
2374       for (k = from; k <= to; k++)
2375         memcpy (ELT (base, k), ELT (temp, k), size);
2376     }
2377 #undef ELT
2378 }
2379
2380 /* Stable sort with interface exactly like standard library's qsort.
2381    Uses mergesort internally, allocating temporary storage with
2382    alloca.  */
2383
2384 void
2385 stable_sort (void *base, size_t nmemb, size_t size,
2386              int (*cmpfun) (const void *, const void *))
2387 {
2388   if (size > 1)
2389     {
2390       void *temp = alloca (nmemb * size * sizeof (void *));
2391       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2392     }
2393 }
2394 \f
2395 /* Print a decimal number.  If it is equal to or larger than ten, the
2396    number is rounded.  Otherwise it is printed with one significant
2397    digit without trailing zeros and with no more than three fractional
2398    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2399    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2400
2401    This is useful for displaying durations because it provides
2402    order-of-magnitude information without unnecessary clutter --
2403    long-running downloads are shown without the fractional part, and
2404    short ones still retain one significant digit.  */
2405
2406 const char *
2407 print_decimal (double number)
2408 {
2409   static char buf[32];
2410   double n = number >= 0 ? number : -number;
2411
2412   if (n >= 9.95)
2413     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2414        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2415     snprintf (buf, sizeof buf, "%.0f", number);
2416   else if (n >= 0.95)
2417     snprintf (buf, sizeof buf, "%.1f", number);
2418   else if (n >= 0.001)
2419     snprintf (buf, sizeof buf, "%.1g", number);
2420   else if (n >= 0.0005)
2421     /* round [0.0005, 0.001) to 0.001 */
2422     snprintf (buf, sizeof buf, "%.3f", number);
2423   else
2424     /* print numbers close to 0 as 0, not 0.000 */
2425     strcpy (buf, "0");
2426
2427   return buf;
2428 }
2429
2430 /* Get the maximum name length for the given path. */
2431 /* Return 0 if length is unknown. */
2432 long
2433 get_max_length (const char *path, int length, int name)
2434 {
2435   long ret;
2436   char *p, *d;
2437
2438   /* Make a copy of the path that we can modify. */
2439   p = path ? strdupdelim (path, path + length) : strdup ("");
2440
2441   for (;;)
2442     {
2443       errno = 0;
2444       /* For an empty path query the current directory. */
2445 #if HAVE_PATHCONF
2446       ret = pathconf (*p ? p : ".", name);
2447       if (!(ret < 0 && errno == ENOENT))
2448         break;
2449 #else
2450       ret = PATH_MAX;
2451 #endif
2452
2453       /* The path does not exist yet, but may be created. */
2454       /* Already at current or root directory, give up. */
2455       if (!*p || strcmp (p, "/") == 0)
2456         break;
2457
2458       /* Remove one directory level and try again. */
2459       d = strrchr (p, '/');
2460       if (d == p)
2461         p[1] = '\0';  /* check root directory */
2462       else if (d)
2463         *d = '\0';  /* remove last directory part */
2464       else
2465         *p = '\0';  /* check current directory */
2466     }
2467
2468   xfree (p);
2469
2470   if (ret < 0)
2471     {
2472       /* pathconf() has a message for us. */
2473       if (errno != 0)
2474           perror ("pathconf");
2475
2476       /* If (errno == 0) then there is no max length.
2477          Even on error return 0 so the caller can continue. */
2478       return 0;
2479     }
2480
2481   return ret;
2482 }
2483
2484 #ifdef TESTING
2485
2486 const char *
2487 test_subdir_p(void)
2488 {
2489   static const struct {
2490     const char *d1;
2491     const char *d2;
2492     bool result;
2493   } test_array[] = {
2494     { "/somedir", "/somedir", true },
2495     { "/somedir", "/somedir/d2", true },
2496     { "/somedir/d1", "/somedir", false },
2497   };
2498   unsigned i;
2499
2500   for (i = 0; i < countof(test_array); ++i)
2501     {
2502       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2503
2504       mu_assert ("test_subdir_p: wrong result",
2505                  res == test_array[i].result);
2506     }
2507
2508   return NULL;
2509 }
2510
2511 const char *
2512 test_dir_matches_p(void)
2513 {
2514   static struct {
2515     const char *dirlist[3];
2516     const char *dir;
2517     bool result;
2518   } test_array[] = {
2519     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2520     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2521     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2522     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2523     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2524     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2525     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2526     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2527     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2528     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2529     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2530     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2531     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2532     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2533     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2534   };
2535   unsigned i;
2536
2537   for (i = 0; i < countof(test_array); ++i)
2538     {
2539       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2540
2541       mu_assert ("test_dir_matches_p: wrong result",
2542                  res == test_array[i].result);
2543     }
2544
2545   return NULL;
2546 }
2547
2548 #endif /* TESTING */
2549