[svn] Simplify iteration over hash table entries.

[wget] / src / utils.c
diff --git a/src/utils.c b/src/utils.c

index 43c13b841d83772d7601ef56b0f3f80ff976a54b..94dc989eed8add3421d028e9f564fb5fccf77a18 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -42,9 +42,6 @@ so, delete this exception statement from your version.  */
  #ifdef HAVE_MMAP
  # include <sys/mman.h>
  #endif
-#ifdef HAVE_PWD_H
-# include <pwd.h>
-#endif
  #ifdef HAVE_UTIME_H
  # include <utime.h>
  #endif
@@ -52,9 +49,6 @@ so, delete this exception statement from your version.  */
  # include <sys/utime.h>
  #endif
  #include <errno.h>
-#ifdef NeXT
-# include <libc.h>             /* for access() */
-#endif
  #include <fcntl.h>
  #include <assert.h>
  #include <stdarg.h>
@@ -68,11 +62,9 @@ so, delete this exception statement from your version.  */
  # include <termios.h>
  #endif
  
-/* Needed for run_with_timeout. */
+/* Needed for Unix version of run_with_timeout. */
  #include <signal.h>
-#ifdef HAVE_SETJMP_H
-# include <setjmp.h>
-#endif
+#include <setjmp.h>
  
  #ifndef HAVE_SIGSETJMP
  /* If sigsetjmp is a macro, configure won't pick it up. */
@@ -81,8 +73,7 @@ so, delete this exception statement from your version.  */
  # endif
  #endif
  
-#undef USE_SIGNAL_TIMEOUT
-#if defined(HAVE_SIGSETJMP) || defined(HAVE_SIGBLOCK)
+#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
  # define USE_SIGNAL_TIMEOUT
  #endif
  
@@ -150,28 +141,48 @@ sepstring (const char *s)
    return res;
  }
  \f
-/* Like sprintf, but allocates a string of sufficient size with malloc
-   and returns it.  GNU libc has a similar function named asprintf,
-   which requires the pointer to the string to be passed.  */
+/* Like sprintf, but prints into a string of sufficient size freshly
+   allocated with malloc, which is returned.  If unable to print due
+   to invalid format, returns NULL.  Inability to allocate needed
+   memory results in abort, as with xmalloc.  This is in spirit
+   similar to the GNU/BSD extension asprintf, but somewhat easier to
+   use.
+
+   Internally the function either calls vasprintf or loops around
+   vsnprintf until the correct size is found.  Since Wget also ships a
+   fallback implementation of vsnprintf, this should be portable.  */
  
  char *
  aprintf (const char *fmt, ...)
  {
-  /* This function is implemented using vsnprintf, which we provide
-     for the systems that don't have it.  Therefore, it should be 100%
-     portable.  */
+#ifdef HAVE_VASPRINTF
+  /* Use vasprintf. */
+  int ret;
+  va_list args;
+  char *str;
+  va_start (args, fmt);
+  ret = vasprintf (&str, fmt, args);
+  va_end (args);
+  if (ret < 0 && errno == ENOMEM)
+    abort ();                  /* for consistency with xmalloc/xrealloc */
+  else if (ret < 0)
+    return NULL;
+  return str;
+#else  /* not HAVE_VASPRINTF */
  
+  /* vasprintf is unavailable.  snprintf into a small buffer and
+     resize it as necessary. */
    int size = 32;
    char *str = xmalloc (size);
  
+  /* #### This code will infloop and eventually abort in xrealloc if
+     passed a FMT that causes snprintf to consistently return -1.  */
+
    while (1)
      {
        int n;
        va_list args;
  
-      /* See log_vprintf_internal for explanation why it's OK to rely
-        on the return value of vsnprintf.  */
-
        va_start (args, fmt);
        n = vsnprintf (str, size, fmt, args);
        va_end (args);
@@ -187,6 +198,7 @@ aprintf (const char *fmt, ...)
         size <<= 1;             /* twice the old size */
        str = xrealloc (str, size);
      }
+#endif /* not HAVE_VASPRINTF */
  }
  
  /* Concatenate the NULL-terminated list of string arguments into
@@ -596,7 +608,7 @@ make_directory (const char *directory)
     file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
     file_merge("foo", "bar")       => "bar"
  
-   In other words, it's a simpler and gentler version of uri_merge_1.  */
+   In other words, it's a simpler and gentler version of uri_merge.  */
  
  char *
  file_merge (const char *base, const char *file)
@@ -615,6 +627,30 @@ file_merge (const char *base, const char *file)
    return result;
  }
  \f
+/* Like fnmatch, but performs a case-insensitive match.  */
+
+int
+fnmatch_nocase (const char *pattern, const char *string, int flags)
+{
+#ifdef FNM_CASEFOLD
+  /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
+     also present on *BSD platforms, and possibly elsewhere.  */
+  return fnmatch (pattern, string, flags | FNM_CASEFOLD);
+#else
+  /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
+  char *patcopy = (char *) alloca (strlen (pattern) + 1);
+  char *strcopy = (char *) alloca (strlen (string) + 1);
+  char *p;
+  for (p = patcopy; *pattern; pattern++, p++)
+    *p = TOLOWER (*pattern);
+  *p = '\0';
+  for (p = strcopy; *string; string++, p++)
+    *p = TOLOWER (*string);
+  *p = '\0';
+  return fnmatch (patcopy, strcopy, flags);
+#endif
+}
+
  static bool in_acclist (const char *const *, const char *, bool);
  
  /* Determine whether a file is acceptable to be followed, according to
@@ -642,28 +678,36 @@ acceptable (const char *s)
  }
  
  /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
-   `/something', frontcmp() will return 1 only if S2 begins with
-   `/something'.  Otherwise, 0 is returned.  */
+   `/something', frontcmp() will return true only if S2 begins with
+   `/something'.  */
  bool
  frontcmp (const char *s1, const char *s2)
  {
-  for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
+  if (!opt.ignore_case)
+    for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
+      ;
+  else
+    for (; *s1 && *s2 && (TOLOWER (*s1) == TOLOWER (*s2)); ++s1, ++s2)
+      ;
    return *s1 == '\0';
  }
  
  /* Iterate through STRLIST, and return the first element that matches
     S, through wildcards or front comparison (as appropriate).  */
  static char *
-proclist (char **strlist, const char *s, enum accd flags)
+proclist (char **strlist, const char *s)
  {
    char **x;
+  int (*matcher) (const char *, const char *, int)
+    = opt.ignore_case ? fnmatch_nocase : fnmatch;
+
    for (x = strlist; *x; x++)
      {
-      /* Remove leading '/' if ALLABS */
-      char *p = *x + ((flags & ALLABS) && (**x == '/'));
+      /* Remove leading '/' */
+      char *p = *x + (**x == '/');
        if (has_wildcards_p (p))
         {
-         if (fnmatch (p, s, FNM_PATHNAME) == 0)
+         if (matcher (p, s, FNM_PATHNAME) == 0)
             break;
         }
        else
@@ -678,22 +722,23 @@ proclist (char **strlist, const char *s, enum accd flags)
  /* Returns whether DIRECTORY is acceptable for download, wrt the
     include/exclude lists.
  
-   If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
-   and absolute paths may be freely intermixed.  */
+   The leading `/' is ignored in paths; relative and absolute paths
+   may be freely intermixed.  */
+
  bool
-accdir (const char *directory, enum accd flags)
+accdir (const char *directory)
  {
    /* Remove starting '/'.  */
-  if (flags & ALLABS && *directory == '/')
+  if (*directory == '/')
      ++directory;
    if (opt.includes)
      {
-      if (!proclist (opt.includes, directory, flags))
+      if (!proclist (opt.includes, directory))
         return false;
      }
    if (opt.excludes)
      {
-      if (proclist (opt.excludes, directory, flags))
+      if (proclist (opt.excludes, directory))
         return false;
      }
    return true;
@@ -748,21 +793,24 @@ in_acclist (const char *const *accepts, const char *s, bool backward)
      {
        if (has_wildcards_p (*accepts))
         {
-         /* fnmatch returns 0 if the pattern *does* match the
-            string.  */
-         if (fnmatch (*accepts, s, 0) == 0)
+         int res = opt.ignore_case
+           ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
+         /* fnmatch returns 0 if the pattern *does* match the string.  */
+         if (res == 0)
             return true;
         }
        else
         {
           if (backward)
             {
-             if (match_tail (s, *accepts, 0))
+             if (match_tail (s, *accepts, opt.ignore_case))
                 return true;
             }
           else
             {
-             if (!strcmp (s, *accepts))
+             int cmp = opt.ignore_case
+               ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
+             if (cmp == 0)
                 return true;
             }
         }
@@ -1052,9 +1100,11 @@ merge_vecs (char **v1, char **v2)
        return v1;
      }
    /* Count v1.  */
-  for (i = 0; v1[i]; i++);
+  for (i = 0; v1[i]; i++)
+    ;
    /* Count v2.  */
-  for (j = 0; v2[j]; j++);
+  for (j = 0; v2[j]; j++)
+    ;
    /* Reallocate v1.  */
    v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
    memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
@@ -1115,56 +1165,41 @@ string_set_contains (struct hash_table *ht, const char *s)
    return hash_table_contains (ht, s);
  }
  
-static int
-string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
-{
-  char ***arrayptr = (char ***) arg;
-  *(*arrayptr)++ = (char *) key;
-  return 0;
-}
-
  /* Convert the specified string set to array.  ARRAY should be large
     enough to hold hash_table_count(ht) char pointers.  */
  
  void string_set_to_array (struct hash_table *ht, char **array)
  {
-  hash_table_map (ht, string_set_to_array_mapper, &array);
-}
-
-static int
-string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
-{
-  xfree (key);
-  return 0;
+  hash_table_iterator iter;
+  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
+    *array++ = iter.key;
  }
  
  void
  string_set_free (struct hash_table *ht)
  {
-  hash_table_map (ht, string_set_free_mapper, NULL);
+  hash_table_iterator iter;
+  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
+    xfree (iter.key);
    hash_table_destroy (ht);
  }
  
-static int
-free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
-{
-  xfree (key);
-  xfree (value);
-  return 0;
-}
-
-/* Another utility function: call free() on all keys and values of HT.  */
+/* Utility function: simply call free() on all keys and values of HT.  */
  
  void
  free_keys_and_values (struct hash_table *ht)
  {
-  hash_table_map (ht, free_keys_and_values_mapper, NULL);
+  hash_table_iterator iter;
+  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
+    {
+      xfree (iter.key);
+      xfree (iter.value);
+    }
  }
-
  \f
-/* Get grouping data, the separator and grouping info, by calling
-   localeconv().  The information is cached after the first call to
-   the function.
+/* Get digit grouping data for thousand separors by calling
+   localeconv().  The data includes separator string and grouping info
+   and is cached after the first call to the function.
  
     In locales that don't set a thousand separator (such as the "C"
     locale), this forces it to be ",".  We are now only showing
@@ -1208,8 +1243,8 @@ get_grouping_data (const char **sep, const char **grouping)
  
     Unfortunately, we cannot use %'d (in fact it would be %'j) to get
     the separators because it's too non-portable, and it's hard to test
-   for this feature at configure time.  Besides, it wouldn't work in
-   the "C" locale, which many Unix users still work in.  */
+   for this feature at configure time.  Besides, it wouldn't display
+   separators in the "C" locale, still used by many Unix users.  */
  
  const char *
  with_thousand_seps (wgint n)
@@ -1233,8 +1268,8 @@ with_thousand_seps (wgint n)
    atgroup = grouping;
    groupsize = *atgroup++;
  
-  /* This will overflow on WGINT_MIN, but we're not using this to
-     print negative numbers anyway.  */
+  /* This would overflow on WGINT_MIN, but printing negative numbers
+     is not an important goal of this fuinction.  */
    if (negative)
      n = -n;
  
@@ -1276,11 +1311,10 @@ with_thousand_seps (wgint n)
     usually improves readability."
  
     This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
-   original computer-related meaning of "powers of 1024".  Powers of
-   1000 would be useless since Wget already displays sizes with
-   thousand separators.  We don't use the "*bibyte" names invented in
-   1998, and seldom used in practice.  Wikipedia's entry on kilobyte
-   discusses this in some detail.  */
+   original computer-related meaning of "powers of 1024".  We don't
+   use the "*bibyte" names invented in 1998, and seldom used in
+   practice.  Wikipedia's entry on "binary prefix" discusses this in
+   some detail.  */
  
  char *
  human_readable (HR_NUMTYPE n)
@@ -1896,19 +1930,17 @@ base64_encode (const char *str, int length, char *b64store)
    return p - b64store;
  }
  
-#define IS_ASCII(c) (((c) & 0x80) == 0)
-#define IS_BASE64(c) ((IS_ASCII (c) && base64_char_to_value[c] >= 0) || c == '=')
+/* Store in C the next non-whitespace character from the string, or \0
+   when end of string is reached.  */
+#define NEXT_CHAR(c, p) do {                   \
+  c = (unsigned char) *p++;                    \
+} while (ISSPACE (c))
  
-/* Get next character from the string, except that non-base64
-   characters are ignored, as mandated by rfc2045.  */
-#define NEXT_BASE64_CHAR(c, p) do {                    \
-  c = *p++;                                            \
-} while (c != '\0' && !IS_BASE64 (c))
+#define IS_ASCII(c) (((c) & 0x80) == 0)
  
-/* Decode data from BASE64 (assumed to be encoded as base64) into
-   memory pointed to by TO.  TO should be large enough to accomodate
-   the decoded data, which is guaranteed to be less than
-   strlen(base64).
+/* Decode data from BASE64 (pointer to \0-terminated text) into memory
+   pointed to by TO.  TO should be large enough to accomodate the
+   decoded data, which is guaranteed to be less than strlen(base64).
  
     Since TO is assumed to contain binary data, it is not
     NUL-terminated.  The function returns the length of the data
@@ -1920,7 +1952,7 @@ base64_decode (const char *base64, char *to)
  {
    /* Table of base64 values for first 128 characters.  Note that this
       assumes ASCII (but so does Wget in other places).  */
-  static short base64_char_to_value[128] =
+  static signed char base64_char_to_value[128] =
      {
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*   0-  9 */
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*  10- 19 */
@@ -1936,6 +1968,8 @@ base64_decode (const char *base64, char *to)
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48, /* 110-119 */
        49,  50,  51,  -1,  -1,  -1,  -1,  -1            /* 120-127 */
      };
+#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
+#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
  
    const char *p = base64;
    char *q = to;
@@ -1946,30 +1980,32 @@ base64_decode (const char *base64, char *to)
        unsigned long value;
  
        /* Process first byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      NEXT_CHAR (c, p);
        if (!c)
         break;
-      if (c == '=')
-       return -1;              /* illegal '=' while decoding base64 */
-      value = base64_char_to_value[c] << 18;
+      if (c == '=' || !IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
+      value = BASE64_CHAR_TO_VALUE (c) << 18;
  
-      /* Process scond byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      /* Process second byte of a quadruplet.  */
+      NEXT_CHAR (c, p);
        if (!c)
         return -1;              /* premature EOF while decoding base64 */
-      if (c == '=')
-       return -1;              /* illegal `=' while decoding base64 */
-      value |= base64_char_to_value[c] << 12;
+      if (c == '=' || !IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
+      value |= BASE64_CHAR_TO_VALUE (c) << 12;
        *q++ = value >> 16;
  
        /* Process third byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      NEXT_CHAR (c, p);
        if (!c)
         return -1;              /* premature EOF while decoding base64 */
+      if (!IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
  
        if (c == '=')
         {
-         NEXT_BASE64_CHAR (c, p);
+         NEXT_CHAR (c, p);
           if (!c)
             return -1;          /* premature EOF while decoding base64 */
           if (c != '=')
@@ -1977,26 +2013,29 @@ base64_decode (const char *base64, char *to)
           continue;
         }
  
-      value |= base64_char_to_value[c] << 6;
+      value |= BASE64_CHAR_TO_VALUE (c) << 6;
        *q++ = 0xff & value >> 8;
  
        /* Process fourth byte of a quadruplet.  */
-      NEXT_BASE64_CHAR (c, p);
+      NEXT_CHAR (c, p);
        if (!c)
         return -1;              /* premature EOF while decoding base64 */
        if (c == '=')
         continue;
+      if (!IS_BASE64 (c))
+       return -1;              /* illegal char while decoding base64 */
  
-      value |= base64_char_to_value[c];
+      value |= BASE64_CHAR_TO_VALUE (c);
        *q++ = 0xff & value;
      }
+#undef IS_BASE64
+#undef BASE64_CHAR_TO_VALUE
  
    return q - to;
  }
  
  #undef IS_ASCII
-#undef IS_BASE64
-#undef NEXT_BASE64_CHAR
+#undef NEXT_CHAR
  \f
  /* Simple merge sort for use by stable_sort.  Implementation courtesy
     Zeljko Vrba with additional debugging by Nenad Barbutov.  */