[svn] Merge of fix for bugs 20341 and 20410.

[wget] / src / utils.c
diff --git a/src/utils.c b/src/utils.c

index 94dc989eed8add3421d028e9f564fb5fccf77a18..90b50043c59772253924e49fce99b115f8dbe092 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,11 +1,11 @@
  /* Various utility functions.
  /* Various utility functions.
-   Copyright (C) 1996-2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2006 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
  GNU Wget is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  
  This file is part of GNU Wget.
  
  GNU Wget is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
  (at your option) any later version.
  
  GNU Wget is distributed in the hope that it will be useful,
  (at your option) any later version.
  
  GNU Wget is distributed in the hope that it will be useful,
@@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  
  In addition, as a special exception, the Free Software Foundation
  gives permission to link the code of its release of Wget with the
  
  In addition, as a special exception, the Free Software Foundation
  gives permission to link the code of its release of Wget with the
@@ -81,6 +80,10 @@ so, delete this exception statement from your version.  */
  #include "utils.h"
  #include "hash.h"
  
  #include "utils.h"
  #include "hash.h"
  
+#ifdef TESTING
+#include "test.h"
+#endif 
+
  /* Utility function: like xstrdup(), but also lowercases S.  */
  
  char *
  /* Utility function: like xstrdup(), but also lowercases S.  */
  
  char *
@@ -155,7 +158,7 @@ sepstring (const char *s)
  char *
  aprintf (const char *fmt, ...)
  {
  char *
  aprintf (const char *fmt, ...)
  {
-#ifdef HAVE_VASPRINTF
+#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
    /* Use vasprintf. */
    int ret;
    va_list args;
    /* Use vasprintf. */
    int ret;
    va_list args;
@@ -249,51 +252,38 @@ concat_strings (const char *str0, ...)
    return ret;
  }
  \f
    return ret;
  }
  \f
+/* Format the provided time according to the specified format.  The
+   format is a string with format elements supported by strftime.  */
+
+static char *
+fmttime (time_t t, const char *fmt)
+{
+  static char output[32];
+  struct tm *tm = localtime(&t);
+  if (!tm)
+    abort ();
+  if (!strftime(output, sizeof(output), fmt, tm))
+    abort ();
+  return output;
+}
+
  /* Return pointer to a static char[] buffer in which zero-terminated
     string-representation of TM (in form hh:mm:ss) is printed.
  
     If TM is NULL, the current time will be used.  */
  
  char *
  /* Return pointer to a static char[] buffer in which zero-terminated
     string-representation of TM (in form hh:mm:ss) is printed.
  
     If TM is NULL, the current time will be used.  */
  
  char *
-time_str (time_t *tm)
+time_str (time_t t)
  {
  {
-  static char output[15];
-  struct tm *ptm;
-  time_t secs = tm ? *tm : time (NULL);
-
-  if (secs == -1)
-    {
-      /* In case of error, return the empty string.  Maybe we should
-        just abort if this happens?  */
-      *output = '\0';
-      return output;
-    }
-  ptm = localtime (&secs);
-  sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
-  return output;
+  return fmttime(t, "%H:%M:%S");
  }
  
  /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
  
  char *
  }
  
  /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
  
  char *
-datetime_str (time_t *tm)
+datetime_str (time_t t)
  {
  {
-  static char output[20];      /* "YYYY-MM-DD hh:mm:ss" + \0 */
-  struct tm *ptm;
-  time_t secs = tm ? *tm : time (NULL);
-
-  if (secs == -1)
-    {
-      /* In case of error, return the empty string.  Maybe we should
-        just abort if this happens?  */
-      *output = '\0';
-      return output;
-    }
-  ptm = localtime (&secs);
-  sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
-          ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
-          ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
-  return output;
+  return fmttime(t, "%Y-%m-%d %H:%M:%S");
  }
  \f
  /* The Windows versions of the following two functions are defined in
  }
  \f
  /* The Windows versions of the following two functions are defined in
@@ -677,46 +667,49 @@ acceptable (const char *s)
    return true;
  }
  
    return true;
  }
  
-/* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
-   `/something', frontcmp() will return true only if S2 begins with
-   `/something'.  */
+/* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
+   will return true if and only if D2 begins with `/something/' or is exactly 
+   '/something'.  */
  bool
  bool
-frontcmp (const char *s1, const char *s2)
+subdir_p (const char *d1, const char *d2)
  {
    if (!opt.ignore_case)
  {
    if (!opt.ignore_case)
-    for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
+    for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
        ;
    else
        ;
    else
-    for (; *s1 && *s2 && (TOLOWER (*s1) == TOLOWER (*s2)); ++s1, ++s2)
+    for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
        ;
        ;
-  return *s1 == '\0';
+  
+  return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
  }
  
  }
  
-/* Iterate through STRLIST, and return the first element that matches
-   S, through wildcards or front comparison (as appropriate).  */
-static char *
-proclist (char **strlist, const char *s)
+/* Iterate through DIRLIST (which must be NULL-terminated), and return the
+   first element that matches DIR, through wildcards or front comparison (as
+   appropriate).  */
+static bool
+dir_matches_p (char **dirlist, const char *dir)
  {
    char **x;
    int (*matcher) (const char *, const char *, int)
      = opt.ignore_case ? fnmatch_nocase : fnmatch;
  
  {
    char **x;
    int (*matcher) (const char *, const char *, int)
      = opt.ignore_case ? fnmatch_nocase : fnmatch;
  
-  for (x = strlist; *x; x++)
+  for (x = dirlist; *x; x++)
      {
        /* Remove leading '/' */
        char *p = *x + (**x == '/');
        if (has_wildcards_p (p))
         {
      {
        /* Remove leading '/' */
        char *p = *x + (**x == '/');
        if (has_wildcards_p (p))
         {
-         if (matcher (p, s, FNM_PATHNAME) == 0)
+         if (matcher (p, dir, FNM_PATHNAME) == 0)
             break;
         }
        else
         {
             break;
         }
        else
         {
-         if (frontcmp (p, s))
+         if (subdir_p (p, dir))
             break;
         }
      }
             break;
         }
      }
-  return *x;
+      
+  return *x ? true : false;
  }
  
  /* Returns whether DIRECTORY is acceptable for download, wrt the
  }
  
  /* Returns whether DIRECTORY is acceptable for download, wrt the
@@ -733,12 +726,12 @@ accdir (const char *directory)
      ++directory;
    if (opt.includes)
      {
      ++directory;
    if (opt.includes)
      {
-      if (!proclist (opt.includes, directory))
+      if (!dir_matches_p (opt.includes, directory))
         return false;
      }
    if (opt.excludes)
      {
         return false;
      }
    if (opt.excludes)
      {
-      if (proclist (opt.excludes, directory))
+      if (dir_matches_p (opt.excludes, directory))
         return false;
      }
    return true;
         return false;
      }
    return true;
@@ -1175,6 +1168,10 @@ void string_set_to_array (struct hash_table *ht, char **array)
      *array++ = iter.key;
  }
  
      *array++ = iter.key;
  }
  
+/* Free the string set.  This frees both the storage allocated for
+   keys and the actual hash table.  (hash_table_destroy would only
+   destroy the hash table.)  */
+
  void
  string_set_free (struct hash_table *ht)
  {
  void
  string_set_free (struct hash_table *ht)
  {
@@ -1184,7 +1181,7 @@ string_set_free (struct hash_table *ht)
    hash_table_destroy (ht);
  }
  
    hash_table_destroy (ht);
  }
  
-/* Utility function: simply call free() on all keys and values of HT.  */
+/* Utility function: simply call xfree() on all keys and values of HT.  */
  
  void
  free_keys_and_values (struct hash_table *ht)
  
  void
  free_keys_and_values (struct hash_table *ht)
@@ -1405,20 +1402,6 @@ numdigit (wgint number)
  #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
  #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
  
  #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
  #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
  
-/* SPRINTF_WGINT is used by number_to_string to handle pathological
-   cases and to portably support strange sizes of wgint.  Ideally this
-   would just use "%j" and intmax_t, but many systems don't support
-   it, so it's used only if nothing else works.  */
-#if SIZEOF_LONG >= SIZEOF_WGINT
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n))
-#elif SIZEOF_LONG_LONG >= SIZEOF_WGINT
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n))
-#elif defined(WINDOWS)
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n))
-#else
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n))
-#endif
-
  /* Shorthand for casting to wgint. */
  #define W wgint
  
  /* Shorthand for casting to wgint. */
  #define W wgint
  
@@ -1428,15 +1411,15 @@ numdigit (wgint number)
  
     The speedup may make a difference in programs that frequently
     convert numbers to strings.  Some implementations of sprintf,
  
     The speedup may make a difference in programs that frequently
     convert numbers to strings.  Some implementations of sprintf,
-   particularly the one in GNU libc, have been known to be extremely
-   slow when converting integers to strings.
+   particularly the one in some versions of GNU libc, have been known
+   to be quite slow when converting integers to strings.
  
     Return the pointer to the location where the terminating zero was
     printed.  (Equivalent to calling buffer+strlen(buffer) after the
     function is done.)
  
  
     Return the pointer to the location where the terminating zero was
     printed.  (Equivalent to calling buffer+strlen(buffer) after the
     function is done.)
  
-   BUFFER should be big enough to accept as many bytes as you expect
-   the number to take up.  On machines with 64-bit longs the maximum
+   BUFFER should be large enough to accept as many bytes as you expect
+   the number to take up.  On machines with 64-bit wgints the maximum
     needed size is 24 bytes.  That includes the digits needed for the
     largest 64-bit number, the `-' sign in case it's negative, and the
     terminating '\0'.  */
     needed size is 24 bytes.  That includes the digits needed for the
     largest 64-bit number, the `-' sign in case it's negative, and the
     terminating '\0'.  */
@@ -1447,21 +1430,29 @@ number_to_string (char *buffer, wgint number)
    char *p = buffer;
    wgint n = number;
  
    char *p = buffer;
    wgint n = number;
  
+  int last_digit_char = 0;
+
  #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
  #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
-  /* We are running in a strange or misconfigured environment.  Let
-     sprintf cope with it.  */
-  SPRINTF_WGINT (buffer, n);
-  p += strlen (buffer);
+  /* We are running in a very strange environment.  Leave the correct
+     printing to sprintf.  */
+  p += sprintf (buf, "%j", (intmax_t) (n));
  #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
  
    if (n < 0)
      {
        if (n < -WGINT_MAX)
         {
  #else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
  
    if (n < 0)
      {
        if (n < -WGINT_MAX)
         {
-         /* -n would overflow.  Have sprintf deal with this.  */
-         SPRINTF_WGINT (buffer, n);
-         p += strlen (buffer);
-         return p;
+         /* n = -n would overflow because -n would evaluate to a
+             wgint value larger than WGINT_MAX.  Need to make n
+             smaller and handle the last digit separately.  */
+          int last_digit = n % 10;
+          /* The sign of n%10 is implementation-defined. */
+          if (last_digit < 0)
+            last_digit_char = '0' - last_digit;
+          else
+            last_digit_char = '0' + last_digit;
+          /* After n is made smaller, -n will not overflow. */
+          n /= 10;
         }
  
        *p++ = '-';
         }
  
        *p++ = '-';
@@ -1501,6 +1492,9 @@ number_to_string (char *buffer, wgint number)
    else                                   DIGITS_19 (1000000000*(W)1000000000);
  #endif
  
    else                                   DIGITS_19 (1000000000*(W)1000000000);
  #endif
  
+  if (last_digit_char)
+    *p++ = last_digit_char;
+
    *p = '\0';
  #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
  
    *p = '\0';
  #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
  
@@ -1881,53 +1875,61 @@ xsleep (double seconds)
  
  #endif /* not WINDOWS */
  
  
  #endif /* not WINDOWS */
  
-/* Encode the string STR of length LENGTH to base64 format and place it
-   to B64STORE.  The output will be \0-terminated, and must point to a
-   writable buffer of at least 1+BASE64_LENGTH(length) bytes.  It
-   returns the length of the resulting base64 data, not counting the
-   terminating zero.
+/* Encode the octets in DATA of length LENGTH to base64 format,
+   storing the result to DEST.  The output will be zero-terminated,
+   and must point to a writable buffer of at least
+   1+BASE64_LENGTH(length) bytes.  The function returns the length of
+   the resulting base64 data, not counting the terminating zero.
  
  
-   This implementation will not emit newlines after 76 characters of
+   This implementation does not emit newlines after 76 characters of
     base64 data.  */
  
  int
     base64 data.  */
  
  int
-base64_encode (const char *str, int length, char *b64store)
+base64_encode (const void *data, int length, char *dest)
  {
    /* Conversion table.  */
  {
    /* Conversion table.  */
-  static char tbl[64] = {
-    'A','B','C','D','E','F','G','H',
-    'I','J','K','L','M','N','O','P',
-    'Q','R','S','T','U','V','W','X',
-    'Y','Z','a','b','c','d','e','f',
-    'g','h','i','j','k','l','m','n',
-    'o','p','q','r','s','t','u','v',
-    'w','x','y','z','0','1','2','3',
-    '4','5','6','7','8','9','+','/'
+  static const char tbl[64] = {
+    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
+    'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
+    'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
+    'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
    };
    };
-  int i;
-  const unsigned char *s = (const unsigned char *) str;
-  char *p = b64store;
+  /* Access bytes in DATA as unsigned char, otherwise the shifts below
+     don't work for data with MSB set. */
+  const unsigned char *s = data;
+  /* Theoretical ANSI violation when length < 3. */
+  const unsigned char *end = (const unsigned char *) data + length - 2;
+  char *p = dest;
  
    /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
  
    /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
-  for (i = 0; i < length; i += 3)
+  for (; s < end; s += 3)
      {
        *p++ = tbl[s[0] >> 2];
        *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
        *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
        *p++ = tbl[s[2] & 0x3f];
      {
        *p++ = tbl[s[0] >> 2];
        *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
        *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
        *p++ = tbl[s[2] & 0x3f];
-      s += 3;
      }
  
    /* Pad the result if necessary...  */
      }
  
    /* Pad the result if necessary...  */
-  if (i == length + 1)
-    *(p - 1) = '=';
-  else if (i == length + 2)
-    *(p - 1) = *(p - 2) = '=';
-
+  switch (length % 3)
+    {
+    case 1:
+      *p++ = tbl[s[0] >> 2];
+      *p++ = tbl[(s[0] & 3) << 4];
+      *p++ = '=';
+      *p++ = '=';
+      break;
+    case 2:
+      *p++ = tbl[s[0] >> 2];
+      *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
+      *p++ = tbl[((s[1] & 0xf) << 2)];
+      *p++ = '=';
+      break;
+    }
    /* ...and zero-terminate it.  */
    *p = '\0';
  
    /* ...and zero-terminate it.  */
    *p = '\0';
  
-  return p - b64store;
+  return p - dest;
  }
  
  /* Store in C the next non-whitespace character from the string, or \0
  }
  
  /* Store in C the next non-whitespace character from the string, or \0
@@ -1938,21 +1940,24 @@ base64_encode (const char *str, int length, char *b64store)
  
  #define IS_ASCII(c) (((c) & 0x80) == 0)
  
  
  #define IS_ASCII(c) (((c) & 0x80) == 0)
  
-/* Decode data from BASE64 (pointer to \0-terminated text) into memory
-   pointed to by TO.  TO should be large enough to accomodate the
-   decoded data, which is guaranteed to be less than strlen(base64).
+/* Decode data from BASE64 (a null-terminated string) into memory
+   pointed to by DEST.  DEST is assumed to be large enough to
+   accomodate the decoded data, which is guaranteed to be no more than
+   3/4*strlen(base64).
  
  
-   Since TO is assumed to contain binary data, it is not
+   Since DEST is assumed to contain binary data, it is not
     NUL-terminated.  The function returns the length of the data
     written to TO.  -1 is returned in case of error caused by malformed
     NUL-terminated.  The function returns the length of the data
     written to TO.  -1 is returned in case of error caused by malformed
-   base64 input.  */
+   base64 input.
+
+   This function originates from Free Recode.  */
  
  int
  
  int
-base64_decode (const char *base64, char *to)
+base64_decode (const char *base64, void *dest)
  {
    /* Table of base64 values for first 128 characters.  Note that this
       assumes ASCII (but so does Wget in other places).  */
  {
    /* Table of base64 values for first 128 characters.  Note that this
       assumes ASCII (but so does Wget in other places).  */
-  static signed char base64_char_to_value[128] =
+  static const signed char base64_char_to_value[128] =
      {
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*   0-  9 */
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*  10- 19 */
      {
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*   0-  9 */
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /*  10- 19 */
@@ -1972,7 +1977,7 @@ base64_decode (const char *base64, char *to)
  #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
  
    const char *p = base64;
  #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
  
    const char *p = base64;
-  char *q = to;
+  char *q = dest;
  
    while (1)
      {
  
    while (1)
      {
@@ -2031,7 +2036,7 @@ base64_decode (const char *base64, char *to)
  #undef IS_BASE64
  #undef BASE64_CHAR_TO_VALUE
  
  #undef IS_BASE64
  #undef BASE64_CHAR_TO_VALUE
  
-  return q - to;
+  return q - (char *) dest;
  }
  
  #undef IS_ASCII
  }
  
  #undef IS_ASCII
@@ -2117,3 +2122,60 @@ print_decimal (double number)
  
    return buf;
  }
  
    return buf;
  }
+
+#ifdef TESTING
+
+const char *
+test_subdir_p()
+{
+  int i;
+  struct {
+    char *d1;
+    char *d2;
+    bool result;
+  } test_array[] = {
+    { "/somedir", "/somedir", true },
+    { "/somedir", "/somedir/d2", true },
+    { "/somedir/d1", "/somedir", false },
+  };
+  
+  for (i = 0; i < countof(test_array); ++i) 
+    {
+      bool res = subdir_p (test_array[i].d1, test_array[i].d2);
+
+      mu_assert ("test_subdir_p: wrong result", 
+                 res == test_array[i].result);
+    }
+
+  return NULL;
+}
+
+const char *
+test_dir_matches_p()
+{
+  int i;
+  struct {
+    char *dirlist[3];
+    char *dir;
+    bool result;
+  } test_array[] = {
+    { { "/somedir", "/someotherdir", NULL }, "somedir", true },
+    { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
+    { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
+    { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
+    { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
+  };
+  
+  for (i = 0; i < countof(test_array); ++i) 
+    {
+      bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
+      
+      mu_assert ("test_dir_matches_p: wrong result", 
+                 res == test_array[i].result);
+    }
+
+  return NULL;
+}
+
+#endif /* TESTING */
+