[svn] New option --ignore-case for case-insensitive matching.

[wget] / src / utils.c
diff --git a/src/utils.c b/src/utils.c

index d1b7d0b45b3d06b9178b261013f20f697184b20c..991aaf4befa4fd6735646d627016df736eb06384 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,5 +1,5 @@
  /* Various utility functions.
-   Copyright (C) 2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2005 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
@@ -14,8 +14,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+along with Wget; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  
  In addition, as a special exception, the Free Software Foundation
  gives permission to link the code of its release of Wget with the
@@ -58,9 +58,7 @@ so, delete this exception statement from your version.  */
  #include <fcntl.h>
  #include <assert.h>
  #include <stdarg.h>
-#ifdef HAVE_LOCALE_H
-# include <locale.h>
-#endif
+#include <locale.h>
  
  /* For TIOCGWINSZ and friends: */
  #ifdef HAVE_SYS_IOCTL_H
@@ -617,6 +615,28 @@ file_merge (const char *base, const char *file)
    return result;
  }
  \f
+/* Like fnmatch, but performs a lower-case comparison.  */
+
+int
+fnmatch_nocase (const char *pattern, const char *string, int flags)
+{
+#ifdef FNM_CASEFOLD
+  return fnmatch (pattern, string, flags | FNM_CASEFOLD);
+#else
+  /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
+  char *patcopy = (char *) alloca (strlen (pattern) + 1);
+  char *strcopy = (char *) alloca (strlen (string) + 1);
+  char *p;
+  for (p = patcopy; *pattern; pattern++, p++)
+    *p = TOLOWER (*pattern);
+  *p = '\0';
+  for (p = strcopy; *string; string++, p++)
+    *p = TOLOWER (*string);
+  *p = '\0';
+  return fnmatch (patcopy, strcopy, flags);
+#endif
+}
+
  static bool in_acclist (const char *const *, const char *, bool);
  
  /* Determine whether a file is acceptable to be followed, according to
@@ -644,28 +664,34 @@ acceptable (const char *s)
  }
  
  /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
-   `/something', frontcmp() will return 1 only if S2 begins with
-   `/something'.  Otherwise, 0 is returned.  */
+   `/something', frontcmp() will return true only if S2 begins with
+   `/something'.  */
  bool
  frontcmp (const char *s1, const char *s2)
  {
-  for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
+  if (!opt.ignore_case)
+    for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
+  else
+    for (; *s1 && *s2 && (TOLOWER (*s1) == TOLOWER (*s2)); ++s1, ++s2);
    return *s1 == '\0';
  }
  
  /* Iterate through STRLIST, and return the first element that matches
     S, through wildcards or front comparison (as appropriate).  */
  static char *
-proclist (char **strlist, const char *s, enum accd flags)
+proclist (char **strlist, const char *s)
  {
    char **x;
+  int (*matcher) (const char *, const char *, int)
+    = opt.ignore_case ? fnmatch_nocase : fnmatch;
+
    for (x = strlist; *x; x++)
      {
-      /* Remove leading '/' if ALLABS */
-      char *p = *x + ((flags & ALLABS) && (**x == '/'));
+      /* Remove leading '/' */
+      char *p = *x + (**x == '/');
        if (has_wildcards_p (p))
         {
-         if (fnmatch (p, s, FNM_PATHNAME) == 0)
+         if (matcher (p, s, FNM_PATHNAME) == 0)
             break;
         }
        else
@@ -680,22 +706,23 @@ proclist (char **strlist, const char *s, enum accd flags)
  /* Returns whether DIRECTORY is acceptable for download, wrt the
     include/exclude lists.
  
-   If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
-   and absolute paths may be freely intermixed.  */
+   The leading `/' is ignored in paths; relative and absolute paths
+   may be freely intermixed.  */
+
  bool
-accdir (const char *directory, enum accd flags)
+accdir (const char *directory)
  {
    /* Remove starting '/'.  */
-  if (flags & ALLABS && *directory == '/')
+  if (*directory == '/')
      ++directory;
    if (opt.includes)
      {
-      if (!proclist (opt.includes, directory, flags))
+      if (!proclist (opt.includes, directory))
         return false;
      }
    if (opt.excludes)
      {
-      if (proclist (opt.excludes, directory, flags))
+      if (proclist (opt.excludes, directory))
         return false;
      }
    return true;
@@ -750,21 +777,24 @@ in_acclist (const char *const *accepts, const char *s, bool backward)
      {
        if (has_wildcards_p (*accepts))
         {
-         /* fnmatch returns 0 if the pattern *does* match the
-            string.  */
-         if (fnmatch (*accepts, s, 0) == 0)
+         int res = opt.ignore_case
+           ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
+         /* fnmatch returns 0 if the pattern *does* match the string.  */
+         if (res == 0)
             return true;
         }
        else
         {
           if (backward)
             {
-             if (match_tail (s, *accepts, 0))
+             if (match_tail (s, *accepts, opt.ignore_case))
                 return true;
             }
           else
             {
-             if (!strcmp (s, *accepts))
+             int cmp = opt.ignore_case
+               ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
+             if (cmp == 0)
                 return true;
             }
         }
@@ -1191,11 +1221,11 @@ get_grouping_data (const char **sep, const char **grouping)
              grouping, which we still want to use it for legibility.
              In those locales set the sep char to ',', unless that
              character is used for decimal point, in which case set it
-            to " ".  */
+            to ".".  */
           if (*lconv->decimal_point != ',')
             cached_sep = ",";
           else
-           cached_sep = " ";
+           cached_sep = ".";
           cached_grouping = "\x03";
         }
        initialized = true;
@@ -1317,10 +1347,7 @@ human_readable (HR_NUMTYPE n)
          *this* power.  */
        if ((n / 1024) < 1024 || i == countof (powers) - 1)
         {
-         /* Must cast to long first because MS VC can't directly cast
-            __int64 to double.  (This is safe because N is known to
-            be < 1024^2, so always fits into long.)  */
-         double val = (double) (long) n / 1024.0;
+         double val = n / 1024.0;
           /* Print values smaller than 10 with one decimal digits, and
              others without any decimals.  */
           snprintf (buf, sizeof (buf), "%.*f%c",
@@ -1480,6 +1507,7 @@ number_to_string (char *buffer, wgint number)
  
  #undef PR
  #undef W
+#undef SPRINTF_WGINT
  #undef DIGITS_1
  #undef DIGITS_2
  #undef DIGITS_3
@@ -1573,68 +1601,74 @@ determine_screen_width (void)
    return 0;
  #endif /* neither TIOCGWINSZ nor WINDOWS */
  }
+\f
+/* Whether the rnd system (either rand or [dl]rand48) has been
+   seeded.  */
+static int rnd_seeded;
  
  /* Return a random number between 0 and MAX-1, inclusive.
  
-   If MAX is greater than the value of RAND_MAX+1 on the system, the
-   returned value will be in the range [0, RAND_MAX].  This may be
-   fixed in a future release.
-
+   If the system does not support lrand48 and MAX is greater than the
+   value of RAND_MAX+1 on the system, the returned value will be in
+   the range [0, RAND_MAX].  This may be fixed in a future release.
     The random number generator is seeded automatically the first time
     it is called.
  
-   This uses rand() for portability.  It has been suggested that
-   random() offers better randomness, but this is not required for
-   Wget, so I chose to go for simplicity and use rand
-   unconditionally.
-
-   DO NOT use this for cryptographic purposes.  It is only meant to be
-   used in situations where quality of the random numbers returned
-   doesn't really matter.  */
+   This uses lrand48 where available, rand elsewhere.  DO NOT use it
+   for cryptography.  It is only meant to be used in situations where
+   quality of the random numbers returned doesn't really matter.  */
  
  int
  random_number (int max)
  {
-  static int seeded;
+#ifdef HAVE_DRAND48
+  if (!rnd_seeded)
+    {
+      srand48 ((long) time (NULL) ^ (long) getpid ());
+      rnd_seeded = 1;
+    }
+  return lrand48 () % max;
+#else  /* not HAVE_DRAND48 */
+
    double bounded;
    int rnd;
-
-  if (!seeded)
+  if (!rnd_seeded)
      {
-      srand (time (NULL));
-      seeded = 1;
+      srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
+      rnd_seeded = 1;
      }
    rnd = rand ();
  
-  /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
-     and enforce that assumption by masking other bits.  */
-#ifndef RAND_MAX
-# define RAND_MAX 32767
-  rnd &= RAND_MAX;
-#endif
+  /* Like rand() % max, but uses the high-order bits for better
+     randomness on architectures where rand() is implemented using a
+     simple congruential generator.  */
  
-  /* This is equivalent to rand() % max, but uses the high-order bits
-     for better randomness on architecture where rand() is implemented
-     using a simple congruential generator.  */
+  bounded = (double) max * rnd / (RAND_MAX + 1.0);
+  return (int) bounded;
  
-  bounded = (double)max * rnd / (RAND_MAX + 1.0);
-  return (int)bounded;
+#endif /* not HAVE_DRAND48 */
  }
  
  /* Return a random uniformly distributed floating point number in the
-   [0, 1) range.  The precision of returned numbers is 9 digits.
-
-   Modify this to use erand48() where available!  */
+   [0, 1) range.  Uses drand48 where available, and a really lame
+   kludge elsewhere.  */
  
  double
  random_float (void)
  {
-  /* We can't rely on any specific value of RAND_MAX, but I'm pretty
-     sure it's greater than 1000.  */
-  int rnd1 = random_number (1000);
-  int rnd2 = random_number (1000);
-  int rnd3 = random_number (1000);
-  return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
+#ifdef HAVE_DRAND48
+  if (!rnd_seeded)
+    {
+      srand48 ((long) time (NULL) ^ (long) getpid ());
+      rnd_seeded = 1;
+    }
+  return drand48 ();
+#else  /* not HAVE_DRAND48 */
+  return (  random_number (10000) / 10000.0
+         + random_number (10000) / (10000.0 * 10000.0)
+         + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
+         + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
+#endif /* not HAVE_DRAND48 */
  }
  \f
  /* Implementation of run_with_timeout, a generic timeout-forcing
@@ -1894,19 +1928,17 @@ base64_encode (const char *str, int length, char *b64store)
    return p - b64store;
  }
  
-#define IS_ASCII(c) (((c) & 0x80) == 0)
-#define IS_BASE64(c) ((IS_ASCII (c) && base64_char_to_value[c] >= 0) || c == '=')
+/* Store in C the next non-whitespace character from the string, or \0
+   when end of string is reached.  */
+#define NEXT_CHAR(c, p) do {                   \
+  c = (unsigned char) *p++;                    \
+} while (ISSPACE (c))
  
-/* Get next character from the string, except that non-base64
-   characters are ignored, as mandated by rfc2045.  */
-#define NEXT_BASE64_CHAR(c, p) do {                    \
-  c = *p++;                                            \
-} while (c != '\0' && !IS_BASE64 (c))
+#define IS_ASCII(c) (((c) & 0x80) == 0)
  
-/* Decode data from BASE64 (assumed to be encoded as base64) into
-   memory pointed to by TO.  TO should be large enough to accomodate
-   the decoded data, which is guaranteed to be less than
-   strlen(base64).
+/* Decode data from BASE64 (pointer to \0-terminated text) into memory
+   pointed to by TO.  TO should be large enough to accomodate the
+   decoded data, which is guaranteed to be less than strlen(base64).
  
     Since TO is assumed to contain binary data, it is not
     NUL-terminated.  The function returns the length of the data
@@ -1918,7 +1950,7 @@ base64_decode (const char *base64, char *to)
  {
    /* Table of base64 values for first 128 characters.  Note that this
       assumes ASCII (but so does Wget in other places).  */
-  static short base64_char_to_value[128] =
+  static signed char base64_char_to_value[128] =
      {
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*   0-  9 */
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*  10- 19 */
@@ -1934,6 +1966,8 @@ base64_decode (const char *base64, char *to)
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48, /* 110-119 */
        49,  50,  51,  -1,  -1,  -1,  -1,  -1            /* 120-127 */
      };
+#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
+#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
  
    const char *p = base64;
    char *q = to;
@@ -1944,30 +1978,32 @@ base64_decode (const char *base64, char *to)
        unsigned long value;
  
        /* Process first byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      NEXT_CHAR (c, p);
        if (!c)
         break;
-      if (c == '=')
-       return -1;              /* illegal '=' while decoding base64 */
-      value = base64_char_to_value[c] << 18;
+      if (c == '=' || !IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
+      value = BASE64_CHAR_TO_VALUE (c) << 18;
  
-      /* Process scond byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      /* Process second byte of a quadruplet.  */
+      NEXT_CHAR (c, p);
        if (!c)
         return -1;              /* premature EOF while decoding base64 */
-      if (c == '=')
-       return -1;              /* illegal `=' while decoding base64 */
-      value |= base64_char_to_value[c] << 12;
+      if (c == '=' || !IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
+      value |= BASE64_CHAR_TO_VALUE (c) << 12;
        *q++ = value >> 16;
  
        /* Process third byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      NEXT_CHAR (c, p);
        if (!c)
         return -1;              /* premature EOF while decoding base64 */
+      if (!IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
  
        if (c == '=')
         {
-         NEXT_BASE64_CHAR (c, p);
+         NEXT_CHAR (c, p);
           if (!c)
             return -1;          /* premature EOF while decoding base64 */
           if (c != '=')
@@ -1975,26 +2011,29 @@ base64_decode (const char *base64, char *to)
           continue;
         }
  
-      value |= base64_char_to_value[c] << 6;
+      value |= BASE64_CHAR_TO_VALUE (c) << 6;
        *q++ = 0xff & value >> 8;
  
        /* Process fourth byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      NEXT_CHAR (c, p);
        if (!c)
         return -1;              /* premature EOF while decoding base64 */
        if (c == '=')
         continue;
+      if (!IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
  
-      value |= base64_char_to_value[c];
+      value |= BASE64_CHAR_TO_VALUE (c);
        *q++ = 0xff & value;
      }
+#undef IS_BASE64
+#undef BASE64_CHAR_TO_VALUE
  
    return q - to;
  }
  
  #undef IS_ASCII
-#undef IS_BASE64
-#undef NEXT_BASE64_CHAR
+#undef NEXT_CHAR
  \f
  /* Simple merge sort for use by stable_sort.  Implementation courtesy
     Zeljko Vrba with additional debugging by Nenad Barbutov.  */
@@ -2041,3 +2080,38 @@ stable_sort (void *base, size_t nmemb, size_t size,
        mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
      }
  }
+\f
+/* Print a decimal number.  If it is equal to or larger than ten, the
+   number is rounded.  Otherwise it is printed with one significant
+   digit without trailing zeros and with no more than three fractional
+   digits total.  For example, 0.1 is printed as "0.1", 0.035 is
+   printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
+
+   This is useful for displaying durations because it provides
+   order-of-magnitude information without unnecessary clutter --
+   long-running downloads are shown without the fractional part, and
+   short ones still retain one significant digit.  */
+
+const char *
+print_decimal (double number)
+{
+  static char buf[32];
+  double n = number >= 0 ? number : -number;
+
+  if (n >= 9.95)
+    /* Cut off at 9.95 because the below %.1f would round 9.96 to
+       "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
+    snprintf (buf, sizeof buf, "%.0f", number);
+  else if (n >= 0.95)
+    snprintf (buf, sizeof buf, "%.1f", number);
+  else if (n >= 0.001)
+    snprintf (buf, sizeof buf, "%.1g", number);
+  else if (n >= 0.0005)
+    /* round [0.0005, 0.001) to 0.001 */
+    snprintf (buf, sizeof buf, "%.3f", number);
+  else
+    /* print numbers close to 0 as 0, not 0.000 */
+    strcpy (buf, "0");
+
+  return buf;
+}