[svn] Move fnmatch() to cmpt.c and don't use it under GNU libc.

[wget] / src / utils.c
diff --git a/src/utils.c b/src/utils.c

index a67ec8de09514b1ec73d6a73288715609ef9c1e2..c78b170866a047add504f679206de31c10f3e6c6 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -92,7 +92,6 @@ so, delete this exception statement from your version.  */
  
  #include "wget.h"
  #include "utils.h"
-#include "fnmatch.h"
  #include "hash.h"
  
  #ifndef errno
@@ -241,7 +240,7 @@ static void
  register_ptr (void *ptr, const char *file, int line)
  {
    int i;
-  for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
+  for (i = 0; i < countof (malloc_debug); i++)
      if (malloc_debug[i].ptr == NULL)
        {
         malloc_debug[i].ptr = ptr;
@@ -259,7 +258,7 @@ static void
  unregister_ptr (void *ptr)
  {
    int i;
-  for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
+  for (i = 0; i < countof (malloc_debug); i++)
      if (malloc_debug[i].ptr == ptr)
        {
         malloc_debug[i].ptr = NULL;
@@ -279,7 +278,7 @@ print_malloc_debug_stats (void)
    int i;
    printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
           malloc_count, free_count, malloc_count - free_count);
-  for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
+  for (i = 0; i < countof (malloc_debug); i++)
      if (malloc_debug[i].ptr != NULL)
        printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
               malloc_debug[i].file, malloc_debug[i].line);
@@ -470,7 +469,7 @@ fork_to_background (void)
  
    if (!opt.lfilename)
      {
-      opt.lfilename = unique_name (DEFAULT_LOGFILE);
+      opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
        changedp = 1;
      }
    pid = fork ();
@@ -574,45 +573,63 @@ file_size (const char *filename)
       that way we can also verify whether the file is readable.
       Inspired by the POST patch by Arnaud Wylie.  */
    FILE *fp = fopen (filename, "rb");
+  if (!fp)
+    return -1;
    fseek (fp, 0, SEEK_END);
    size = ftell (fp);
    fclose (fp);
    return size;
  }
  
-/* Return a unique filename, given a prefix and count */
+/* stat file names named PREFIX.1, PREFIX.2, etc., until one that
+   doesn't exist is found.  Return a freshly allocated copy of the
+   unused file name.  */
+
  static char *
-unique_name_1 (const char *fileprefix, int count)
+unique_name_1 (const char *prefix)
  {
-  char *filename;
+  int count = 1;
+  int plen = strlen (prefix);
+  char *template = (char *)alloca (plen + 1 + 24);
+  char *template_tail = template + plen;
  
-  if (count)
-    {
-      filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
-      sprintf (filename, "%s.%d", fileprefix, count);
-    }
-  else
-    filename = xstrdup (fileprefix);
+  memcpy (template, prefix, plen);
+  *template_tail++ = '.';
  
-  if (!file_exists_p (filename))
-    return filename;
-  else
-    {
-      xfree (filename);
-      return NULL;
-    }
+  do
+    number_to_string (template_tail, count++);
+  while (file_exists_p (template));
+
+  return xstrdup (template);
  }
  
-/* Return a unique file name, based on PREFIX.  */
+/* Return a unique file name, based on FILE.
+
+   More precisely, if FILE doesn't exist, it is returned unmodified.
+   If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
+   file name that doesn't exist is returned.
+
+   The resulting file is not created, only verified that it didn't
+   exist at the point in time when the function was called.
+   Therefore, where security matters, don't rely that the file created
+   by this function exists until you open it with O_EXCL or
+   something.
+
+   If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
+   string.  Otherwise, it may return FILE if the file doesn't exist
+   (and therefore doesn't need changing).  */
+
  char *
-unique_name (const char *prefix)
+unique_name (const char *file, int allow_passthrough)
  {
-  char *file = NULL;
-  int count = 0;
+  /* If the FILE itself doesn't exist, return it without
+     modification. */
+  if (!file_exists_p (file))
+    return allow_passthrough ? (char *)file : xstrdup (file);
  
-  while (!file)
-    file = unique_name_1 (prefix, count++);
-  return file;
+  /* Otherwise, find a numeric suffix that results in unused file name
+     and return it.  */
+  return unique_name_1 (file);
  }
  \f
  /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
@@ -856,6 +873,18 @@ suffix (const char *str)
      return NULL;
  }
  
+/* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
+   `]').  */
+
+int
+has_wildcards_p (const char *s)
+{
+  for (; *s; s++)
+    if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
+      return 1;
+  return 0;
+}
+
  /* Return non-zero if FNAME ends with a typical HTML suffix.  The
     following (case-insensitive) suffixes are presumed to be HTML files:
     
@@ -882,8 +911,8 @@ has_html_suffix_p (const char *fname)
  }
  
  /* Read a line from FP and return the pointer to freshly allocated
-   storage.  The stoarage space is obtained through malloc() and
-   should be freed with free() when it is no longer needed.
+   storage.  The storage space is obtained through malloc() and should
+   be freed with free() when it is no longer needed.
  
     The length of the line is not limited, except by available memory.
     The newline character at the end of line is retained.  The line is
@@ -1017,7 +1046,7 @@ read_file (const char *file)
           /* Normally, we grow SIZE exponentially to make the number
               of calls to read() and realloc() logarithmic in relation
               to file size.  However, read() can read an amount of data
-             smaller than requested, and it would be unreasonably to
+             smaller than requested, and it would be unreasonable to
               double SIZE every time *something* was read.  Therefore,
               we double SIZE only when the length exceeds half of the
               entire allocated size.  */
@@ -1551,11 +1580,11 @@ struct wget_timer {
  
    /* The most recent elapsed time, calculated by wtimer_elapsed().
       Measured in milliseconds.  */
-  long elapsed_last;
+  double elapsed_last;
  
    /* Approximately, the time elapsed between the true start of the
       measurement and the time represented by START.  */
-  long elapsed_pre_start;
+  double elapsed_pre_start;
  };
  
  /* Allocate a timer.  It is not legal to do anything with a freshly
@@ -1602,9 +1631,20 @@ wtimer_sys_set (wget_sys_time *wst)
  #endif
  
  #ifdef TIMER_WINDOWS
+  /* We use GetSystemTime to get the elapsed time.  MSDN warns that
+     system clock adjustments can skew the output of GetSystemTime
+     when used as a timer and gives preference to GetTickCount and
+     high-resolution timers.  But GetTickCount can overflow, and hires
+     timers are typically used for profiling, not for regular time
+     measurement.  Since we handle clock skew anyway, we just use
+     GetSystemTime.  */
    FILETIME ft;
    SYSTEMTIME st;
    GetSystemTime (&st);
+
+  /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
+     FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
+     arithmetic on that.  */
    SystemTimeToFileTime (&st, &ft);
    wst->HighPart = ft.dwHighDateTime;
    wst->LowPart  = ft.dwLowDateTime;
@@ -1624,12 +1664,12 @@ wtimer_reset (struct wget_timer *wt)
    wt->elapsed_pre_start = 0;
  }
  
-static long
+static double
  wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
  {
  #ifdef TIMER_GETTIMEOFDAY
-  return ((wst1->tv_sec - wst2->tv_sec) * 1000
-         + (wst1->tv_usec - wst2->tv_usec) / 1000);
+  return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
+         + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
  #endif
  
  #ifdef TIMER_TIME
@@ -1637,19 +1677,23 @@ wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
  #endif
  
  #ifdef WINDOWS
-  return (long)(wst1->QuadPart - wst2->QuadPart) / 10000;
+  /* VC++ 6 doesn't support direct cast of uint64 to double.  To work
+     around this, we subtract, then convert to signed, then finally to
+     double.  */
+  return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
  #endif
  }
  
  /* Return the number of milliseconds elapsed since the timer was last
     reset.  It is allowed to call this function more than once to get
-   increasingly higher elapsed values.  */
+   increasingly higher elapsed values.  These timers handle clock
+   skew.  */
  
-long
+double
  wtimer_elapsed (struct wget_timer *wt)
  {
    wget_sys_time now;
-  long elapsed;
+  double elapsed;
  
    wtimer_sys_set (&now);
    elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
@@ -1678,27 +1722,28 @@ wtimer_elapsed (struct wget_timer *wt)
    return elapsed;
  }
  
-/* Return the assessed granularity of the timer implementation.  This
-   is important for certain code that tries to deal with "zero" time
-   intervals.  */
+/* Return the assessed granularity of the timer implementation, in
+   milliseconds.  This is used by code that tries to substitute a
+   better value for timers that have returned zero.  */
  
-long
+double
  wtimer_granularity (void)
  {
  #ifdef TIMER_GETTIMEOFDAY
-  /* Granularity of gettimeofday is hugely architecture-dependent.
-     However, it appears that on modern machines it is better than
-     1ms.  */
-  return 1;
+  /* Granularity of gettimeofday varies wildly between architectures.
+     However, it appears that on modern machines it tends to be better
+     than 1ms.  Assume 100 usecs.  (Perhaps the configure process
+     could actually measure this?)  */
+  return 0.1;
  #endif
  
  #ifdef TIMER_TIME
-  /* This is clear. */
    return 1000;
  #endif
  
  #ifdef TIMER_WINDOWS
-  /* ? */
+  /* According to MSDN, GetSystemTime returns a broken-down time
+     structure the smallest member of which are milliseconds.  */
    return 1;
  #endif
  }
@@ -1815,7 +1860,11 @@ determine_screen_width (void)
     This uses rand() for portability.  It has been suggested that
     random() offers better randomness, but this is not required for
     Wget, so I chose to go for simplicity and use rand
-   unconditionally.  */
+   unconditionally.
+
+   DO NOT use this for cryptographic purposes.  It is only meant to be
+   used in situations where quality of the random numbers returned
+   doesn't really matter.  */
  
  int
  random_number (int max)
@@ -1846,6 +1895,22 @@ random_number (int max)
    return (int)bounded;
  }
  
+/* Return a random uniformly distributed floating point number in the
+   [0, 1) range.  The precision of returned numbers is 9 digits.
+
+   Modify this to use erand48() where available!  */
+
+double
+random_float (void)
+{
+  /* We can't rely on any specific value of RAND_MAX, but I'm pretty
+     sure it's greater than 1000.  */
+  int rnd1 = random_number (1000);
+  int rnd2 = random_number (1000);
+  int rnd3 = random_number (1000);
+  return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
+}
+
  #if 0
  /* A debugging function for checking whether an MD5 library works. */
  
@@ -1870,8 +1935,8 @@ debug_test_md5 (char *buf)
    cnt = 16;
    while (cnt--)
      {
-      *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
-      *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
+      *p2++ = XNUM_TO_digit (*p1 >> 4);
+      *p2++ = XNUM_TO_digit (*p1 & 0xf);
        ++p1;
      }
    *p2 = '\0';
@@ -1880,8 +1945,9 @@ debug_test_md5 (char *buf)
  }
  #endif
  \f
-/* Implementation of run_with_timeout, a generic timeout handler for
-   systems with Unix-like signal handling.  */
+/* Implementation of run_with_timeout, a generic timeout-forcing
+   routine for systems with Unix-like signal handling.  */
+
  #ifdef USE_SIGNAL_TIMEOUT
  # ifdef HAVE_SIGSETJMP
  #  define SETJMP(env) sigsetjmp (env, 1)
@@ -1907,22 +1973,93 @@ abort_run_with_timeout (int sig)
       if we longjumped out of the handler at this point, SIGALRM would
       remain blocked.  We must unblock it manually. */
    int mask = siggetmask ();
-  mask &= ~sigmask(SIGALRM);
+  mask &= ~sigmask (SIGALRM);
    sigsetmask (mask);
  
    /* Now it's safe to longjump. */
    longjmp (run_with_timeout_env, -1);
  }
  # endif /* not HAVE_SIGSETJMP */
-#endif /* USE_SIGNAL_TIMEOUT */
+
+/* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
+   setitimer where available, alarm otherwise.
+
+   TIMEOUT should be non-zero.  If the timeout value is so small that
+   it would be rounded to zero, it is rounded to the least legal value
+   instead (1us for setitimer, 1s for alarm).  That ensures that
+   SIGALRM will be delivered in all cases.  */
+
+static void
+alarm_set (double timeout)
+{
+#ifdef ITIMER_REAL
+  /* Use the modern itimer interface. */
+  struct itimerval itv;
+  memset (&itv, 0, sizeof (itv));
+  itv.it_value.tv_sec = (long) timeout;
+  itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
+  if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
+    /* Ensure that we wait for at least the minimum interval.
+       Specifying zero would mean "wait forever".  */
+    itv.it_value.tv_usec = 1;
+  setitimer (ITIMER_REAL, &itv, NULL);
+#else  /* not ITIMER_REAL */
+  /* Use the old alarm() interface. */
+  int secs = (int) timeout;
+  if (secs == 0)
+    /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
+       because alarm(0) means "never deliver the alarm", i.e. "wait
+       forever", which is not what someone who specifies a 0.5s
+       timeout would expect.  */
+    secs = 1;
+  alarm (secs);
+#endif /* not ITIMER_REAL */
+}
+
+/* Cancel the alarm set with alarm_set. */
+
+static void
+alarm_cancel (void)
+{
+#ifdef ITIMER_REAL
+  struct itimerval disable;
+  memset (&disable, 0, sizeof (disable));
+  setitimer (ITIMER_REAL, &disable, NULL);
+#else  /* not ITIMER_REAL */
+  alarm (0);
+#endif /* not ITIMER_REAL */
+}
+
+/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
+   seconds.  Returns non-zero if the function was interrupted with a
+   timeout, zero otherwise.
+
+   This works by setting up SIGALRM to be delivered in TIMEOUT seconds
+   using setitimer() or alarm().  The timeout is enforced by
+   longjumping out of the SIGALRM handler.  This has several
+   advantages compared to the traditional approach of relying on
+   signals causing system calls to exit with EINTR:
+
+     * The callback function is *forcibly* interrupted after the
+       timeout expires, (almost) regardless of what it was doing and
+       whether it was in a syscall.  For example, a calculation that
+       takes a long time is interrupted as reliably as an IO
+       operation.
+
+     * It works with both SYSV and BSD signals because it doesn't
+       depend on the default setting of SA_RESTART.
+
+     * It doesn't special handler setup beyond a simple call to
+       signal().  (It does use sigsetjmp/siglongjmp, but they're
+       optional.)
+
+   The only downside is that, if FUN allocates internal resources that
+   are normally freed prior to exit from the functions, they will be
+   lost in case of timeout.  */
  
  int
-run_with_timeout (long timeout, void (*fun) (void *), void *arg)
+run_with_timeout (double timeout, void (*fun) (void *), void *arg)
  {
-#ifndef USE_SIGNAL_TIMEOUT
-  fun (arg);
-  return 0;
-#else
    int saved_errno;
  
    if (timeout == 0)
@@ -1938,16 +2075,30 @@ run_with_timeout (long timeout, void (*fun) (void *), void *arg)
        signal (SIGALRM, SIG_DFL);
        return 1;
      }
-  alarm (timeout);
+  alarm_set (timeout);
    fun (arg);
  
    /* Preserve errno in case alarm() or signal() modifies it. */
    saved_errno = errno;
-  alarm (0);
+  alarm_cancel ();
    signal (SIGALRM, SIG_DFL);
    errno = saved_errno;
  
    return 0;
-#endif
  }
  
+#else  /* not USE_SIGNAL_TIMEOUT */
+
+#ifndef WINDOWS
+/* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
+   define it under Windows, because Windows has its own version of
+   run_with_timeout that uses threads.  */
+
+int
+run_with_timeout (double timeout, void (*fun) (void *), void *arg)
+{
+  fun (arg);
+  return 0;
+}
+#endif /* not WINDOWS */
+#endif /* not USE_SIGNAL_TIMEOUT */