[svn] Use sets/hash-tables instead of "slists". Remove slist implementation from

[wget] / src / utils.c
diff --git a/src/utils.c b/src/utils.c

index 5b577587f22bb6668d4c8ff596c4d0008284dc41..1d2c5c539eba1d00dd0f493c2212c0bffa3853bd 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,5 +1,5 @@
  /* Various utility functions.
-   Copyright (C) 2003 Free Software Foundation, Inc.
+   Copyright (C) 2005 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
@@ -46,7 +46,9 @@ so, delete this exception statement from your version.  */
  #ifdef HAVE_PWD_H
  # include <pwd.h>
  #endif
-#include <limits.h>
+#ifdef HAVE_LIMITS_H
+# include <limits.h>
+#endif
  #ifdef HAVE_UTIME_H
  # include <utime.h>
  #endif
@@ -59,6 +61,11 @@ so, delete this exception statement from your version.  */
  #endif
  #include <fcntl.h>
  #include <assert.h>
+#ifdef WGET_USE_STDARG
+# include <stdarg.h>
+#else
+# include <varargs.h>
+#endif
  
  /* For TIOCGWINSZ and friends: */
  #ifdef HAVE_SYS_IOCTL_H
@@ -113,19 +120,6 @@ xstrdup_lower (const char *s)
    return copy;
  }
  
-/* Return a count of how many times CHR occurs in STRING. */
-
-int
-count_char (const char *string, char chr)
-{
-  const char *p;
-  int count = 0;
-  for (p = string; *p; p++)
-    if (*p == chr)
-      ++count;
-  return count;
-}
-
  /* Copy the string formed by two pointers (one on the beginning, other
     on the char after the last char) to a new, malloc-ed location.
     0-terminate it.  */
@@ -174,23 +168,111 @@ sepstring (const char *s)
    return res;
  }
  \f
+#ifdef WGET_USE_STDARG
+# define VA_START(args, arg1) va_start (args, arg1)
+#else
+# define VA_START(args, ignored) va_start (args)
+#endif
+
+/* Like sprintf, but allocates a string of sufficient size with malloc
+   and returns it.  GNU libc has a similar function named asprintf,
+   which requires the pointer to the string to be passed.  */
+
+char *
+aprintf (const char *fmt, ...)
+{
+  /* This function is implemented using vsnprintf, which we provide
+     for the systems that don't have it.  Therefore, it should be 100%
+     portable.  */
+
+  int size = 32;
+  char *str = xmalloc (size);
+
+  while (1)
+    {
+      int n;
+      va_list args;
+
+      /* See log_vprintf_internal for explanation why it's OK to rely
+        on the return value of vsnprintf.  */
+
+      VA_START (args, fmt);
+      n = vsnprintf (str, size, fmt, args);
+      va_end (args);
+
+      /* If the printing worked, return the string. */
+      if (n > -1 && n < size)
+       return str;
+
+      /* Else try again with a larger buffer. */
+      if (n > -1)              /* C99 */
+       size = n + 1;           /* precisely what is needed */
+      else
+       size <<= 1;             /* twice the old size */
+      str = xrealloc (str, size);
+    }
+  return NULL;                 /* unreached */
+}
+
+/* Concatenate the NULL-terminated list of string arguments into
+   freshly allocated space.  */
+
+char *
+concat_strings (const char *str0, ...)
+{
+  va_list args;
+  int saved_lengths[5];                /* inspired by Apache's apr_pstrcat */
+  char *ret, *p;
+
+  const char *next_str;
+  int total_length = 0;
+  int argcount;
+
+  /* Calculate the length of and allocate the resulting string. */
+
+  argcount = 0;
+  VA_START (args, str0);
+  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
+    {
+      int len = strlen (next_str);
+      if (argcount < countof (saved_lengths))
+       saved_lengths[argcount++] = len;
+      total_length += len;
+    }
+  va_end (args);
+  p = ret = xmalloc (total_length + 1);
+
+  /* Copy the strings into the allocated space. */
+
+  argcount = 0;
+  VA_START (args, str0);
+  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
+    {
+      int len;
+      if (argcount < countof (saved_lengths))
+       len = saved_lengths[argcount++];
+      else
+       len = strlen (next_str);
+      memcpy (p, next_str, len);
+      p += len;
+    }
+  va_end (args);
+  *p = '\0';
+
+  return ret;
+}
+\f
  /* Return pointer to a static char[] buffer in which zero-terminated
     string-representation of TM (in form hh:mm:ss) is printed.
  
-   If TM is non-NULL, the current time-in-seconds will be stored
-   there.
-
-   (#### This is misleading: one would expect TM would be used instead
-   of the current time in that case.  This design was probably
-   influenced by the design time(2), and should be changed at some
-   points.  No callers use non-NULL TM anyway.)  */
+   If TM is NULL, the current time will be used.  */
  
  char *
  time_str (time_t *tm)
  {
    static char output[15];
    struct tm *ptm;
-  time_t secs = time (tm);
+  time_t secs = tm ? *tm : time (NULL);
  
    if (secs == -1)
      {
@@ -211,7 +293,7 @@ datetime_str (time_t *tm)
  {
    static char output[20];      /* "YYYY-MM-DD hh:mm:ss" + \0 */
    struct tm *ptm;
-  time_t secs = time (tm);
+  time_t secs = tm ? *tm : time (NULL);
  
    if (secs == -1)
      {
@@ -236,12 +318,21 @@ fork_to_background (void)
  {
    pid_t pid;
    /* Whether we arrange our own version of opt.lfilename here.  */
-  int changedp = 0;
+  int logfile_changed = 0;
  
    if (!opt.lfilename)
      {
-      opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
-      changedp = 1;
+      /* We must create the file immediately to avoid either a race
+        condition (which arises from using unique_name and failing to
+        use fopen_excl) or lying to the user about the log file name
+        (which arises from using unique_name, printing the name, and
+        using fopen_excl later on.)  */
+      FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, 0, &opt.lfilename);
+      if (new_log_fp)
+       {
+         logfile_changed = 1;
+         fclose (new_log_fp);
+       }
      }
    pid = fork ();
    if (pid < 0)
@@ -254,7 +345,7 @@ fork_to_background (void)
      {
        /* parent, no error */
        printf (_("Continuing in background, pid %d.\n"), (int)pid);
-      if (changedp)
+      if (logfile_changed)
         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
        exit (0);                        /* #### should we use _exit()? */
      }
@@ -290,7 +381,7 @@ int
  remove_link (const char *file)
  {
    int err = 0;
-  struct stat st;
+  struct_stat st;
  
    if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
      {
@@ -316,7 +407,7 @@ file_exists_p (const char *filename)
  #ifdef HAVE_ACCESS
    return access (filename, F_OK) >= 0;
  #else
-  struct stat buf;
+  struct_stat buf;
    return stat (filename, &buf) >= 0;
  #endif
  }
@@ -326,7 +417,7 @@ file_exists_p (const char *filename)
  int
  file_non_directory_p (const char *path)
  {
-  struct stat buf;
+  struct_stat buf;
    /* Use lstat() rather than stat() so that symbolic links pointing to
       directories can be identified correctly.  */
    if (lstat (path, &buf) != 0)
@@ -336,20 +427,28 @@ file_non_directory_p (const char *path)
  
  /* Return the size of file named by FILENAME, or -1 if it cannot be
     opened or seeked into. */
-long
+wgint
  file_size (const char *filename)
  {
-  long size;
+#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
+  wgint size;
    /* We use fseek rather than stat to determine the file size because
-     that way we can also verify whether the file is readable.
-     Inspired by the POST patch by Arnaud Wylie.  */
+     that way we can also verify that the file is readable without
+     explicitly checking for permissions.  Inspired by the POST patch
+     by Arnaud Wylie.  */
    FILE *fp = fopen (filename, "rb");
    if (!fp)
      return -1;
-  fseek (fp, 0, SEEK_END);
-  size = ftell (fp);
+  fseeko (fp, 0, SEEK_END);
+  size = ftello (fp);
    fclose (fp);
    return size;
+#else
+  struct_stat st;
+  if (stat (filename, &st) < 0)
+    return -1;
+  return st.st_size;
+#endif
  }
  
  /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
@@ -384,7 +483,7 @@ unique_name_1 (const char *prefix)
     exist at the point in time when the function was called.
     Therefore, where security matters, don't rely that the file created
     by this function exists until you open it with O_EXCL or
-   something.
+   equivalent.
  
     If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
     string.  Otherwise, it may return FILE if the file doesn't exist
@@ -402,6 +501,74 @@ unique_name (const char *file, int allow_passthrough)
       and return it.  */
    return unique_name_1 (file);
  }
+
+/* Create a file based on NAME, except without overwriting an existing
+   file with that name.  Providing O_EXCL is correctly implemented,
+   this function does not have the race condition associated with
+   opening the file returned by unique_name.  */
+
+FILE *
+unique_create (const char *name, int binary, char **opened_name)
+{
+  /* unique file name, based on NAME */
+  char *uname = unique_name (name, 0);
+  FILE *fp;
+  while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
+    {
+      xfree (uname);
+      uname = unique_name (name, 0);
+    }
+  if (opened_name && fp != NULL)
+    {
+      if (fp)
+       *opened_name = uname;
+      else
+       {
+         *opened_name = NULL;
+         xfree (uname);
+       }
+    }
+  else
+    xfree (uname);
+  return fp;
+}
+
+/* Open the file for writing, with the addition that the file is
+   opened "exclusively".  This means that, if the file already exists,
+   this function will *fail* and errno will be set to EEXIST.  If
+   BINARY is set, the file will be opened in binary mode, equivalent
+   to fopen's "wb".
+
+   If opening the file fails for any reason, including the file having
+   previously existed, this function returns NULL and sets errno
+   appropriately.  */
+   
+FILE *
+fopen_excl (const char *fname, int binary)
+{
+  int fd;
+#ifdef O_EXCL
+  int flags = O_WRONLY | O_CREAT | O_EXCL;
+# ifdef O_BINARY
+  if (binary)
+    flags |= O_BINARY;
+# endif
+  fd = open (fname, flags, 0666);
+  if (fd < 0)
+    return NULL;
+  return fdopen (fd, binary ? "wb" : "w");
+#else  /* not O_EXCL */
+  /* Manually check whether the file exists.  This is prone to race
+     conditions, but systems without O_EXCL haven't deserved
+     better.  */
+  if (file_exists_p (fname))
+    {
+      errno = EEXIST;
+      return NULL;
+    }
+  return fopen (fname, binary ? "wb" : "w");
+#endif /* not O_EXCL */
+}
  \f
  /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
     are missing, create them first.  In case any mkdir() call fails,
@@ -412,9 +579,7 @@ unique_name (const char *file, int allow_passthrough)
  int
  make_directory (const char *directory)
  {
-  int quit = 0;
-  int i;
-  int ret = 0;
+  int i, ret, quit = 0;
    char *dir;
  
    /* Make a copy of dir, to be able to write to it.  Otherwise, the
@@ -772,7 +937,7 @@ read_file (const char *file)
  
  #ifdef HAVE_MMAP
    {
-    struct stat buf;
+    struct_stat buf;
      if (fstat (fd, &buf) < 0)
        goto mmap_lose;
      fm->length = buf.st_size;
@@ -804,7 +969,7 @@ read_file (const char *file)
    fm->content = xmalloc (size);
    while (1)
      {
-      long nread;
+      wgint nread;
        if (fm->length > size / 2)
         {
           /* #### I'm not sure whether the whole exponential-growth
@@ -915,91 +1080,6 @@ merge_vecs (char **v1, char **v2)
    xfree (v2);
    return v1;
  }
-
-/* A set of simple-minded routines to store strings in a linked list.
-   This used to also be used for searching, but now we have hash
-   tables for that.  */
-
-/* It's a shame that these simple things like linked lists and hash
-   tables (see hash.c) need to be implemented over and over again.  It
-   would be nice to be able to use the routines from glib -- see
-   www.gtk.org for details.  However, that would make Wget depend on
-   glib, and I want to avoid dependencies to external libraries for
-   reasons of convenience and portability (I suspect Wget is more
-   portable than anything ever written for Gnome).  */
-
-/* Append an element to the list.  If the list has a huge number of
-   elements, this can get slow because it has to find the list's
-   ending.  If you think you have to call slist_append in a loop,
-   think about calling slist_prepend() followed by slist_nreverse().  */
-
-slist *
-slist_append (slist *l, const char *s)
-{
-  slist *newel = xnew (slist);
-  slist *beg = l;
-
-  newel->string = xstrdup (s);
-  newel->next = NULL;
-
-  if (!l)
-    return newel;
-  /* Find the last element.  */
-  while (l->next)
-    l = l->next;
-  l->next = newel;
-  return beg;
-}
-
-/* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
-
-slist *
-slist_prepend (slist *l, const char *s)
-{
-  slist *newel = xnew (slist);
-  newel->string = xstrdup (s);
-  newel->next = l;
-  return newel;
-}
-
-/* Destructively reverse L. */
-
-slist *
-slist_nreverse (slist *l)
-{
-  slist *prev = NULL;
-  while (l)
-    {
-      slist *next = l->next;
-      l->next = prev;
-      prev = l;
-      l = next;
-    }
-  return prev;
-}
-
-/* Is there a specific entry in the list?  */
-int
-slist_contains (slist *l, const char *s)
-{
-  for (; l; l = l->next)
-    if (!strcmp (l->string, s))
-      return 1;
-  return 0;
-}
-
-/* Free the whole slist.  */
-void
-slist_free (slist *l)
-{
-  while (l)
-    {
-      slist *n = l->next;
-      xfree (l->string);
-      xfree (l);
-      l = n;
-    }
-}
  \f
  /* Sometimes it's useful to create "sets" of strings, i.e. special
     hash tables where you want to store strings as keys and merely
@@ -1030,6 +1110,22 @@ string_set_contains (struct hash_table *ht, const char *s)
    return hash_table_contains (ht, s);
  }
  
+static int
+string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
+{
+  char ***arrayptr = (char ***) arg;
+  *(*arrayptr)++ = (char *) key;
+  return 0;
+}
+
+/* Convert the specified string set to array.  ARRAY should be large
+   enough to hold hash_table_count(ht) char pointers.  */
+
+void string_set_to_array (struct hash_table *ht, char **array)
+{
+  hash_table_map (ht, string_set_to_array_mapper, &array);
+}
+
  static int
  string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
  {
@@ -1061,11 +1157,11 @@ free_keys_and_values (struct hash_table *ht)
  }
  
  \f
-/* Engine for legible and legible_large_int; add thousand separators
-   to numbers printed in strings.  */
+/* Add thousand separators to a number already in string form.  Used
+   by with_thousand_seps and with_thousand_seps_large.  */
  
  static char *
-legible_1 (const char *repr)
+add_thousand_seps (const char *repr)
  {
    static char outbuf[48];
    int i, i1, mod;
@@ -1101,64 +1197,122 @@ legible_1 (const char *repr)
    return outbuf;
  }
  
-/* Legible -- return a static pointer to the legibly printed long.  */
+/* Return a static pointer to the number printed with thousand
+   separators inserted at the right places.  */
  
  char *
-legible (long l)
+with_thousand_seps (wgint l)
  {
    char inbuf[24];
    /* Print the number into the buffer.  */
    number_to_string (inbuf, l);
-  return legible_1 (inbuf);
+  return add_thousand_seps (inbuf);
  }
  
  /* Write a string representation of LARGE_INT NUMBER into the provided
-   buffer.  The buffer should be able to accept 24 characters,
-   including the terminating zero.
+   buffer.
  
     It would be dangerous to use sprintf, because the code wouldn't
     work on a machine with gcc-provided long long support, but without
-   libc support for "%lld".  However, such platforms will typically
-   not have snprintf and will use our version, which does support
-   "%lld" where long longs are available.  */
+   libc support for "%lld".  However, such old systems platforms
+   typically lack snprintf and will end up using our version, which
+   does support "%lld" whereever long longs are available.  */
  
  static void
-large_int_to_string (char *buffer, LARGE_INT number)
+large_int_to_string (char *buffer, int bufsize, LARGE_INT number)
  {
-  snprintf (buffer, 24, LARGE_INT_FMT, number);
+  snprintf (buffer, bufsize, LARGE_INT_FMT, number);
  }
  
-/* The same as legible(), but works on LARGE_INT.  */
+/* The same as with_thousand_seps, but works on LARGE_INT.  */
  
  char *
-legible_large_int (LARGE_INT l)
+with_thousand_seps_large (LARGE_INT l)
  {
    char inbuf[48];
-  large_int_to_string (inbuf, l);
-  return legible_1 (inbuf);
+  large_int_to_string (inbuf, sizeof (inbuf), l);
+  return add_thousand_seps (inbuf);
+}
+
+/* N, a byte quantity, is converted to a human-readable abberviated
+   form a la sizes printed by `ls -lh'.  The result is written to a
+   static buffer, a pointer to which is returned.
+
+   Unlike `with_thousand_seps', this approximates to the nearest unit.
+   Quoting GNU libit: "Most people visually process strings of 3-4
+   digits effectively, but longer strings of digits are more prone to
+   misinterpretation.  Hence, converting to an abbreviated form
+   usually improves readability."
+
+   This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
+   original computer science meaning of "multiples of 1024".
+   Multiples of 1000 would be useless since Wget already adds thousand
+   separators for legibility.  We don't use the "*bibyte" names
+   invented in 1998, and seldom used in practice.  Wikipedia's entry
+   on kilobyte discusses this in some detail.  */
+
+char *
+human_readable (wgint n)
+{
+  /* These suffixes are compatible with those of GNU `ls -lh'. */
+  static char powers[] =
+    {
+      'K',                     /* kilobyte, 2^10 bytes */
+      'M',                     /* megabyte, 2^20 bytes */
+      'G',                     /* gigabyte, 2^30 bytes */
+      'T',                     /* terabyte, 2^40 bytes */
+      'P',                     /* petabyte, 2^50 bytes */
+      'E',                     /* exabyte,  2^60 bytes */
+    };
+  static char buf[8];
+  int i;
+
+  /* If the quantity is smaller than 1K, just print it. */
+  if (n < 1024)
+    {
+      snprintf (buf, sizeof (buf), "%d", (int) n);
+      return buf;
+    }
+
+  /* Loop over powers, dividing N with 1024 in each iteration.  This
+     works unchanged for all sizes of wgint, while still avoiding
+     non-portable `long double' arithmetic.  */
+  for (i = 0; i < countof (powers); i++)
+    {
+      /* At each iteration N is greater than the *subsequent* power.
+        That way N/1024.0 produces a decimal number in the units of
+        *this* power.  */
+      if ((n >> 10) < 1024 || i == countof (powers) - 1)
+       {
+         /* Must cast to long first because MS VC can't directly cast
+            __int64 to double.  (This is safe because N is known to
+            be <2**20.)  */
+         double val = (double) (long) n / 1024.0;
+         /* Print values smaller than 10 with one decimal digits, and
+            others without any decimals.  */
+         snprintf (buf, sizeof (buf), "%.*f%c",
+                   val < 10 ? 1 : 0, val, powers[i]);
+         return buf;
+       }
+      n >>= 10;
+    }
+  return NULL;                 /* unreached */
  }
  
-/* Count the digits in a (long) integer.  */
+/* Count the digits in the provided number.  Used to allocate space
+   when printing numbers.  */
+
  int
-numdigit (long number)
+numdigit (wgint number)
  {
    int cnt = 1;
    if (number < 0)
-    {
-      number = -number;
-      ++cnt;
-    }
-  while ((number /= 10) > 0)
+    ++cnt;                     /* accomodate '-' */
+  while ((number /= 10) != 0)
      ++cnt;
    return cnt;
  }
  
-/* A half-assed implementation of INT_MAX on machines that don't
-   bother to define one. */
-#ifndef INT_MAX
-# define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
-#endif
-
  #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
  #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
  
@@ -1173,7 +1327,7 @@ numdigit (long number)
  #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
  #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
  
-/* DIGITS_<11-20> are only used on machines with 64-bit longs. */
+/* DIGITS_<11-20> are only used on machines with 64-bit numbers. */
  
  #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
  #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
@@ -1185,13 +1339,72 @@ numdigit (long number)
  #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
  #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
  
-/* Print NUMBER to BUFFER in base 10.  This should be completely
-   equivalent to `sprintf(buffer, "%ld", number)', only much faster.
+/* It is annoying that we have three different syntaxes for 64-bit constants:
+    - nnnL for 64-bit systems, where they are of type long;
+    - nnnLL for 32-bit systems that support long long;
+    - nnnI64 for MS compiler on Windows, which doesn't support long long. */
+
+#if SIZEOF_LONG > 4
+/* If long is large enough, use long constants. */
+# define C10000000000 10000000000L
+# define C100000000000 100000000000L
+# define C1000000000000 1000000000000L
+# define C10000000000000 10000000000000L
+# define C100000000000000 100000000000000L
+# define C1000000000000000 1000000000000000L
+# define C10000000000000000 10000000000000000L
+# define C100000000000000000 100000000000000000L
+# define C1000000000000000000 1000000000000000000L
+#else
+# if SIZEOF_LONG_LONG != 0
+/* Otherwise, if long long is available, use long long constants. */
+#  define C10000000000 10000000000LL
+#  define C100000000000 100000000000LL
+#  define C1000000000000 1000000000000LL
+#  define C10000000000000 10000000000000LL
+#  define C100000000000000 100000000000000LL
+#  define C1000000000000000 1000000000000000LL
+#  define C10000000000000000 10000000000000000LL
+#  define C100000000000000000 100000000000000000LL
+#  define C1000000000000000000 1000000000000000000LL
+# else
+#  if defined(WINDOWS)
+/* Use __int64 constants under Windows. */
+#   define C10000000000 10000000000I64
+#   define C100000000000 100000000000I64
+#   define C1000000000000 1000000000000I64
+#   define C10000000000000 10000000000000I64
+#   define C100000000000000 100000000000000I64
+#   define C1000000000000000 1000000000000000I64
+#   define C10000000000000000 10000000000000000I64
+#   define C100000000000000000 100000000000000000I64
+#   define C1000000000000000000 1000000000000000000I64
+#  endif
+# endif
+#endif
+
+/* SPRINTF_WGINT is used by number_to_string to handle pathological
+   cases and to portably support strange sizes of wgint. */
+#if SIZEOF_LONG >= SIZEOF_WGINT
+#  define SPRINTF_WGINT(buf, n) sprintf(buf, "%ld", (long) (n))
+#else
+# if SIZEOF_LONG_LONG >= SIZEOF_WGINT
+#   define SPRINTF_WGINT(buf, n) sprintf(buf, "%lld", (long long) (n))
+# else
+#  ifdef WINDOWS
+#   define SPRINTF_WGINT(buf, n) sprintf(buf, "%I64", (__int64) (n))
+#  endif
+# endif
+#endif
+
+/* Print NUMBER to BUFFER in base 10.  This is equivalent to
+   `sprintf(buffer, "%lld", (long long) number)', only typically much
+   faster and portable to machines without long long.
  
     The speedup may make a difference in programs that frequently
     convert numbers to strings.  Some implementations of sprintf,
     particularly the one in GNU libc, have been known to be extremely
-   slow compared to this function.
+   slow when converting integers to strings.
  
     Return the pointer to the location where the terminating zero was
     printed.  (Equivalent to calling buffer+strlen(buffer) after the
@@ -1204,25 +1417,25 @@ numdigit (long number)
     terminating '\0'.  */
  
  char *
-number_to_string (char *buffer, long number)
+number_to_string (char *buffer, wgint number)
  {
    char *p = buffer;
-  long n = number;
+  wgint n = number;
  
-#if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
+#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
    /* We are running in a strange or misconfigured environment.  Let
       sprintf cope with it.  */
-  sprintf (buffer, "%ld", n);
+  SPRINTF_WGINT (buffer, n);
    p += strlen (buffer);
-#else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
+#else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
  
    if (n < 0)
      {
-      if (n < -INT_MAX)
+      if (n < -WGINT_MAX)
         {
           /* We cannot print a '-' and assign -n to n because -n would
              overflow.  Let sprintf deal with this border case.  */
-         sprintf (buffer, "%ld", n);
+         SPRINTF_WGINT (buffer, n);
           p += strlen (buffer);
           return p;
         }
@@ -1240,24 +1453,26 @@ number_to_string (char *buffer, long number)
    else if (n < 10000000)             { DIGITS_7 (1000000); }
    else if (n < 100000000)            { DIGITS_8 (10000000); }
    else if (n < 1000000000)           { DIGITS_9 (100000000); }
-#if SIZEOF_LONG == 4
+#if SIZEOF_WGINT == 4
+  /* wgint is four bytes long: we're done. */
    /* ``if (1)'' serves only to preserve editor indentation. */
    else if (1)                        { DIGITS_10 (1000000000); }
-#else  /* SIZEOF_LONG != 4 */
-  else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
-  else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
-  else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
-  else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
-  else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
-  else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
-  else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
-  else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
-  else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
-  else                               { DIGITS_19 (1000000000000000000L); }
-#endif /* SIZEOF_LONG != 4 */
+#else
+  /* wgint is 64 bits long -- make sure to process all the digits. */
+  else if (n < C10000000000)         { DIGITS_10 (1000000000); }
+  else if (n < C100000000000)        { DIGITS_11 (C10000000000); }
+  else if (n < C1000000000000)       { DIGITS_12 (C100000000000); }
+  else if (n < C10000000000000)      { DIGITS_13 (C1000000000000); }
+  else if (n < C100000000000000)     { DIGITS_14 (C10000000000000); }
+  else if (n < C1000000000000000)    { DIGITS_15 (C100000000000000); }
+  else if (n < C10000000000000000)   { DIGITS_16 (C1000000000000000); }
+  else if (n < C100000000000000000)  { DIGITS_17 (C10000000000000000); }
+  else if (n < C1000000000000000000) { DIGITS_18 (C100000000000000000); }
+  else                               { DIGITS_19 (C1000000000000000000); }
+#endif
  
    *p = '\0';
-#endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
+#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
  
    return p;
  }
@@ -1284,6 +1499,50 @@ number_to_string (char *buffer, long number)
  #undef DIGITS_17
  #undef DIGITS_18
  #undef DIGITS_19
+
+#define RING_SIZE 3
+
+/* Print NUMBER to a statically allocated string and return a pointer
+   to the printed representation.
+
+   This function is intended to be used in conjunction with printf.
+   It is hard to portably print wgint values:
+    a) you cannot use printf("%ld", number) because wgint can be long
+       long on 32-bit machines with LFS.
+    b) you cannot use printf("%lld", number) because NUMBER could be
+       long on 32-bit machines without LFS, or on 64-bit machines,
+       which do not require LFS.  Also, Windows doesn't support %lld.
+    c) you cannot use printf("%j", (int_max_t) number) because not all
+       versions of printf support "%j", the most notable being the one
+       on Windows.
+    d) you cannot #define WGINT_FMT to the appropriate format and use
+       printf(WGINT_FMT, number) because that would break translations
+       for user-visible messages, such as printf("Downloaded: %d
+       bytes\n", number).
+
+   What you should use instead is printf("%s", number_to_static_string
+   (number)).
+
+   CAVEAT: since the function returns pointers to static data, you
+   must be careful to copy its result before calling it again.
+   However, to make it more useful with printf, the function maintains
+   an internal ring of static buffers to return.  That way things like
+   printf("%s %s", number_to_static_string (num1),
+   number_to_static_string (num2)) work as expected.  Three buffers
+   are currently used, which means that "%s %s %s" will work, but "%s
+   %s %s %s" won't.  If you need to print more than three wgints,
+   bump the RING_SIZE (or rethink your message.)  */
+
+char *
+number_to_static_string (wgint number)
+{
+  static char ring[RING_SIZE][24];
+  static int ringpos;
+  char *buf = ring[ringpos];
+  number_to_string (buf, number);
+  ringpos = (ringpos + 1) % RING_SIZE;
+  return buf;
+}
  \f
  /* Support for timers. */
  
@@ -1295,11 +1554,7 @@ number_to_string (char *buffer, long number)
     only one of the above constants will be defined.  Virtually all
     modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
     use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
-   non-Windows systems without gettimeofday.
-
-   #### Perhaps we should also support ftime(), which exists on old
-   BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
-   C, if memory serves me.)  */
+   non-Windows systems without gettimeofday.  */
  
  #ifdef WINDOWS
  # define TIMER_WINDOWS
@@ -1320,10 +1575,16 @@ typedef time_t wget_sys_time;
  #endif
  
  #ifdef TIMER_WINDOWS
-typedef ULARGE_INTEGER wget_sys_time;
+typedef union {
+  DWORD lores;          /* In case GetTickCount is used */
+  LARGE_INTEGER hires;  /* In case high-resolution timer is used */
+} wget_sys_time;
  #endif
  
  struct wget_timer {
+  /* Whether the start time has been initialized. */
+  int initialized;
+
    /* The starting point in time which, subtracted from the current
       time, yields elapsed time. */
    wget_sys_time start;
@@ -1337,13 +1598,53 @@ struct wget_timer {
    double elapsed_pre_start;
  };
  
-/* Allocate a timer.  It is not legal to do anything with a freshly
-   allocated timer, except call wtimer_reset() or wtimer_delete().  */
+#ifdef TIMER_WINDOWS
+
+/* Whether high-resolution timers are used.  Set by wtimer_initialize_once
+   the first time wtimer_allocate is called. */
+static int using_hires_timers;
+
+/* Frequency of high-resolution timers -- number of updates per
+   millisecond.  Calculated the first time wtimer_allocate is called
+   provided that high-resolution timers are available. */
+static double hires_millisec_freq;
+
+/* The first time a timer is created, determine whether to use
+   high-resolution timers. */
+
+static void
+wtimer_initialize_once (void)
+{
+  static int init_done;
+  if (!init_done)
+    {
+      LARGE_INTEGER freq;
+      init_done = 1;
+      freq.QuadPart = 0;
+      QueryPerformanceFrequency (&freq);
+      if (freq.QuadPart != 0)
+        {
+          using_hires_timers = 1;
+          hires_millisec_freq = (double) freq.QuadPart / 1000.0;
+        }
+     }
+}
+#endif /* TIMER_WINDOWS */
+
+/* Allocate a timer.  Calling wtimer_read on the timer will return
+   zero.  It is not legal to call wtimer_update with a freshly
+   allocated timer -- use wtimer_reset first.  */
  
  struct wget_timer *
  wtimer_allocate (void)
  {
    struct wget_timer *wt = xnew (struct wget_timer);
+  xzero (*wt);
+
+#ifdef TIMER_WINDOWS
+  wtimer_initialize_once ();
+#endif
+
    return wt;
  }
  
@@ -1380,29 +1681,24 @@ wtimer_sys_set (wget_sys_time *wst)
  #endif
  
  #ifdef TIMER_WINDOWS
-  /* We use GetSystemTime to get the elapsed time.  MSDN warns that
-     system clock adjustments can skew the output of GetSystemTime
-     when used as a timer and gives preference to GetTickCount and
-     high-resolution timers.  But GetTickCount can overflow, and hires
-     timers are typically used for profiling, not for regular time
-     measurement.  Since we handle clock skew anyway, we just use
-     GetSystemTime.  */
-  FILETIME ft;
-  SYSTEMTIME st;
-  GetSystemTime (&st);
-
-  /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
-     FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
-     arithmetic on that.  */
-  SystemTimeToFileTime (&st, &ft);
-  wst->HighPart = ft.dwHighDateTime;
-  wst->LowPart  = ft.dwLowDateTime;
+  if (using_hires_timers)
+    {
+      QueryPerformanceCounter (&wst->hires);
+    }
+  else
+    {
+      /* Where hires counters are not available, use GetTickCount rather
+         GetSystemTime, because it is unaffected by clock skew and simpler
+         to use.  Note that overflows don't affect us because we never use
+         absolute values of the ticker, only the differences.  */
+      wst->lores = GetTickCount ();
+    }
  #endif
  }
  
  /* Reset timer WT.  This establishes the starting point from which
-   wtimer_elapsed() will return the number of elapsed
-   milliseconds.  It is allowed to reset a previously used timer.  */
+   wtimer_elapsed() will return the number of elapsed milliseconds.
+   It is allowed to reset a previously used timer.  */
  
  void
  wtimer_reset (struct wget_timer *wt)
@@ -1411,6 +1707,7 @@ wtimer_reset (struct wget_timer *wt)
    wtimer_sys_set (&wt->start);
    wt->elapsed_last = 0;
    wt->elapsed_pre_start = 0;
+  wt->initialized = 1;
  }
  
  static double
@@ -1426,24 +1723,29 @@ wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
  #endif
  
  #ifdef WINDOWS
-  /* VC++ 6 doesn't support direct cast of uint64 to double.  To work
-     around this, we subtract, then convert to signed, then finally to
-     double.  */
-  return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
+  if (using_hires_timers)
+    return (wst1->hires.QuadPart - wst2->hires.QuadPart) / hires_millisec_freq;
+  else
+    return wst1->lores - wst2->lores;
  #endif
  }
  
-/* Return the number of milliseconds elapsed since the timer was last
-   reset.  It is allowed to call this function more than once to get
-   increasingly higher elapsed values.  These timers handle clock
-   skew.  */
+/* Update the timer's elapsed interval.  This function causes the
+   timer to call gettimeofday (or time(), etc.) to update its idea of
+   current time.  To get the elapsed interval in milliseconds, use
+   wtimer_read.
  
-double
-wtimer_elapsed (struct wget_timer *wt)
+   This function handles clock skew, i.e. time that moves backwards is
+   ignored.  */
+
+void
+wtimer_update (struct wget_timer *wt)
  {
    wget_sys_time now;
    double elapsed;
  
+  assert (wt->initialized != 0);
+
    wtimer_sys_set (&now);
    elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
  
@@ -1468,7 +1770,22 @@ wtimer_elapsed (struct wget_timer *wt)
      }
  
    wt->elapsed_last = elapsed;
-  return elapsed;
+}
+
+/* Return the elapsed time in milliseconds between the last call to
+   wtimer_reset and the last call to wtimer_update.
+
+   A typical use of the timer interface would be:
+
+       struct wtimer *timer = wtimer_new ();
+       ... do something that takes a while ...
+       wtimer_update ();
+       double msecs = wtimer_read ();  */
+
+double
+wtimer_read (const struct wget_timer *wt)
+{
+  return wt->elapsed_last;
  }
  
  /* Return the assessed granularity of the timer implementation, in
@@ -1491,9 +1808,10 @@ wtimer_granularity (void)
  #endif
  
  #ifdef TIMER_WINDOWS
-  /* According to MSDN, GetSystemTime returns a broken-down time
-     structure the smallest member of which are milliseconds.  */
-  return 1;
+  if (using_hires_timers)
+    return 1.0 / hires_millisec_freq;
+  else
+    return 10;  /* according to MSDN */
  #endif
  }
  \f
@@ -1580,9 +1898,7 @@ determine_screen_width (void)
  {
    /* If there's a way to get the terminal size using POSIX
       tcgetattr(), somebody please tell me.  */
-#ifndef TIOCGWINSZ
-  return 0;
-#else  /* TIOCGWINSZ */
+#ifdef TIOCGWINSZ
    int fd;
    struct winsize wsz;
  
@@ -1594,7 +1910,16 @@ determine_screen_width (void)
      return 0;                  /* most likely ENOTTY */
  
    return wsz.ws_col;
-#endif /* TIOCGWINSZ */
+#else  /* not TIOCGWINSZ */
+# ifdef WINDOWS
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
+    return 0;
+  return csbi.dwSize.X;
+# else /* neither WINDOWS nor TIOCGWINSZ */
+  return 0;
+#endif /* neither WINDOWS nor TIOCGWINSZ */
+#endif /* not TIOCGWINSZ */
  }
  
  /* Return a random number between 0 and MAX-1, inclusive.
@@ -1659,40 +1984,6 @@ random_float (void)
    int rnd3 = random_number (1000);
    return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
  }
-
-#if 0
-/* A debugging function for checking whether an MD5 library works. */
-
-#include "gen-md5.h"
-
-char *
-debug_test_md5 (char *buf)
-{
-  unsigned char raw[16];
-  static char res[33];
-  unsigned char *p1;
-  char *p2;
-  int cnt;
-  ALLOCA_MD5_CONTEXT (ctx);
-
-  gen_md5_init (ctx);
-  gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
-  gen_md5_finish (ctx, raw);
-
-  p1 = raw;
-  p2 = res;
-  cnt = 16;
-  while (cnt--)
-    {
-      *p2++ = XNUM_TO_digit (*p1 >> 4);
-      *p2++ = XNUM_TO_digit (*p1 & 0xf);
-      ++p1;
-    }
-  *p2 = '\0';
-
-  return res;
-}
-#endif
  \f
  /* Implementation of run_with_timeout, a generic timeout-forcing
     routine for systems with Unix-like signal handling.  */
@@ -1863,8 +2154,9 @@ xsleep (double seconds)
  #ifdef HAVE_NANOSLEEP
    /* nanosleep is the preferred interface because it offers high
       accuracy and, more importantly, because it allows us to reliably
-     restart after having been interrupted by a signal such as
-     SIGWINCH.  */
+     restart receiving a signal such as SIGWINCH.  (There was an
+     actual Debian bug report about --limit-rate malfunctioning while
+     the terminal was being resized.)  */
    struct timespec sleep, remaining;
    sleep.tv_sec = (long) seconds;
    sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);
@@ -1875,11 +2167,10 @@ xsleep (double seconds)
  #else  /* not HAVE_NANOSLEEP */
  #ifdef HAVE_USLEEP
    /* If usleep is available, use it in preference to select.  */
-  if (seconds > 1000)
+  if (seconds >= 1)
      {
-      /* usleep apparently accepts unsigned long, which means it can't
-        sleep longer than ~70 min (35min if signed).  If the period
-        is larger than what usleep can safely handle, use sleep
+      /* On some systems, usleep cannot handle values larger than
+        1,000,000.  If the period is larger than that, use sleep
          first, then add usleep for subsecond accuracy.  */
        sleep (seconds);
        seconds -= (long) seconds;