/* Various utility functions.
- Copyright (C) 1996-2005 Free Software Foundation, Inc.
+ Copyright (C) 1996-2006 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
#include "utils.h"
#include "hash.h"
+#ifdef TESTING
+#include "test.h"
+#endif
+
/* Utility function: like xstrdup(), but also lowercases S. */
char *
char *
aprintf (const char *fmt, ...)
{
-#ifdef HAVE_VASPRINTF
+#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
/* Use vasprintf. */
int ret;
va_list args;
int size = 32;
char *str = xmalloc (size);
+ /* #### This code will infloop and eventually abort in xrealloc if
+ passed a FMT that causes snprintf to consistently return -1. */
+
while (1)
{
int n;
return ret;
}
\f
+/* Format the provided time according to the specified format. The
+ format is a string with format elements supported by strftime. */
+
+static char *
+fmttime (time_t t, const char *fmt)
+{
+ static char output[32];
+ struct tm *tm = localtime(&t);
+ if (!tm)
+ abort ();
+ if (!strftime(output, sizeof(output), fmt, tm))
+ abort ();
+ return output;
+}
+
/* Return pointer to a static char[] buffer in which zero-terminated
string-representation of TM (in form hh:mm:ss) is printed.
If TM is NULL, the current time will be used. */
char *
-time_str (time_t *tm)
+time_str (time_t t)
{
- static char output[15];
- struct tm *ptm;
- time_t secs = tm ? *tm : time (NULL);
-
- if (secs == -1)
- {
- /* In case of error, return the empty string. Maybe we should
- just abort if this happens? */
- *output = '\0';
- return output;
- }
- ptm = localtime (&secs);
- sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
- return output;
+ return fmttime(t, "%H:%M:%S");
}
/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
char *
-datetime_str (time_t *tm)
+datetime_str (time_t t)
{
- static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
- struct tm *ptm;
- time_t secs = tm ? *tm : time (NULL);
-
- if (secs == -1)
- {
- /* In case of error, return the empty string. Maybe we should
- just abort if this happens? */
- *output = '\0';
- return output;
- }
- ptm = localtime (&secs);
- sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
- ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
- ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
- return output;
+ return fmttime(t, "%Y-%m-%d %H:%M:%S");
}
\f
/* The Windows versions of the following two functions are defined in
return true;
}
-/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
- `/something', frontcmp() will return true only if S2 begins with
- `/something'. */
+/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
+ will return true if and only if D2 begins with `/something/' or is exactly
+ '/something'. */
bool
-frontcmp (const char *s1, const char *s2)
+subdir_p (const char *d1, const char *d2)
{
if (!opt.ignore_case)
- for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
+ for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
;
else
- for (; *s1 && *s2 && (TOLOWER (*s1) == TOLOWER (*s2)); ++s1, ++s2)
+ for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
;
- return *s1 == '\0';
+
+ return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
}
-/* Iterate through STRLIST, and return the first element that matches
- S, through wildcards or front comparison (as appropriate). */
-static char *
-proclist (char **strlist, const char *s)
+/* Iterate through DIRLIST (which must be NULL-terminated), and return the
+ first element that matches DIR, through wildcards or front comparison (as
+ appropriate). */
+static bool
+dir_matches_p (char **dirlist, const char *dir)
{
char **x;
int (*matcher) (const char *, const char *, int)
= opt.ignore_case ? fnmatch_nocase : fnmatch;
- for (x = strlist; *x; x++)
+ for (x = dirlist; *x; x++)
{
/* Remove leading '/' */
char *p = *x + (**x == '/');
if (has_wildcards_p (p))
{
- if (matcher (p, s, FNM_PATHNAME) == 0)
+ if (matcher (p, dir, FNM_PATHNAME) == 0)
break;
}
else
{
- if (frontcmp (p, s))
+ if (subdir_p (p, dir))
break;
}
}
- return *x;
+
+ return *x ? true : false;
}
/* Returns whether DIRECTORY is acceptable for download, wrt the
++directory;
if (opt.includes)
{
- if (!proclist (opt.includes, directory))
+ if (!dir_matches_p (opt.includes, directory))
return false;
}
if (opt.excludes)
{
- if (proclist (opt.excludes, directory))
+ if (dir_matches_p (opt.excludes, directory))
return false;
}
return true;
return hash_table_contains (ht, s);
}
-static int
-string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
-{
- char ***arrayptr = (char ***) arg;
- *(*arrayptr)++ = (char *) key;
- return 0;
-}
-
/* Convert the specified string set to array. ARRAY should be large
enough to hold hash_table_count(ht) char pointers. */
void string_set_to_array (struct hash_table *ht, char **array)
{
- hash_table_map (ht, string_set_to_array_mapper, &array);
+ hash_table_iterator iter;
+ for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
+ *array++ = iter.key;
}
-static int
-string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
-{
- xfree (key);
- return 0;
-}
+/* Free the string set. This frees both the storage allocated for
+ keys and the actual hash table. (hash_table_destroy would only
+ destroy the hash table.) */
void
string_set_free (struct hash_table *ht)
{
- hash_table_map (ht, string_set_free_mapper, NULL);
+ hash_table_iterator iter;
+ for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
+ xfree (iter.key);
hash_table_destroy (ht);
}
-static int
-free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
-{
- xfree (key);
- xfree (value);
- return 0;
-}
-
-/* Another utility function: call free() on all keys and values of HT. */
+/* Utility function: simply call xfree() on all keys and values of HT. */
void
free_keys_and_values (struct hash_table *ht)
{
- hash_table_map (ht, free_keys_and_values_mapper, NULL);
+ hash_table_iterator iter;
+ for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
+ {
+ xfree (iter.key);
+ xfree (iter.value);
+ }
}
-
\f
-/* Get grouping data, the separator and grouping info, by calling
- localeconv(). The information is cached after the first call to
- the function.
+/* Get digit grouping data for thousand separors by calling
+ localeconv(). The data includes separator string and grouping info
+ and is cached after the first call to the function.
In locales that don't set a thousand separator (such as the "C"
locale), this forces it to be ",". We are now only showing
Unfortunately, we cannot use %'d (in fact it would be %'j) to get
the separators because it's too non-portable, and it's hard to test
- for this feature at configure time. Besides, it wouldn't work in
- the "C" locale, which many Unix users still work in. */
+ for this feature at configure time. Besides, it wouldn't display
+ separators in the "C" locale, still used by many Unix users. */
const char *
with_thousand_seps (wgint n)
atgroup = grouping;
groupsize = *atgroup++;
- /* This will overflow on WGINT_MIN, but we're not using this to
- print negative numbers anyway. */
+ /* This would overflow on WGINT_MIN, but printing negative numbers
+ is not an important goal of this fuinction. */
if (negative)
n = -n;
usually improves readability."
This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
- original computer-related meaning of "powers of 1024". Powers of
- 1000 would be useless since Wget already displays sizes with
- thousand separators. We don't use the "*bibyte" names invented in
- 1998, and seldom used in practice. Wikipedia's entry on kilobyte
- discusses this in some detail. */
+ original computer-related meaning of "powers of 1024". We don't
+ use the "*bibyte" names invented in 1998, and seldom used in
+ practice. Wikipedia's entry on "binary prefix" discusses this in
+ some detail. */
char *
human_readable (HR_NUMTYPE n)
#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
-/* SPRINTF_WGINT is used by number_to_string to handle pathological
- cases and to portably support strange sizes of wgint. Ideally this
- would just use "%j" and intmax_t, but many systems don't support
- it, so it's used only if nothing else works. */
-#if SIZEOF_LONG >= SIZEOF_WGINT
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n))
-#elif SIZEOF_LONG_LONG >= SIZEOF_WGINT
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n))
-#elif defined(WINDOWS)
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n))
-#else
-# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n))
-#endif
-
/* Shorthand for casting to wgint. */
#define W wgint
The speedup may make a difference in programs that frequently
convert numbers to strings. Some implementations of sprintf,
- particularly the one in GNU libc, have been known to be extremely
- slow when converting integers to strings.
+ particularly the one in some versions of GNU libc, have been known
+ to be quite slow when converting integers to strings.
Return the pointer to the location where the terminating zero was
printed. (Equivalent to calling buffer+strlen(buffer) after the
function is done.)
- BUFFER should be big enough to accept as many bytes as you expect
- the number to take up. On machines with 64-bit longs the maximum
+ BUFFER should be large enough to accept as many bytes as you expect
+ the number to take up. On machines with 64-bit wgints the maximum
needed size is 24 bytes. That includes the digits needed for the
largest 64-bit number, the `-' sign in case it's negative, and the
terminating '\0'. */
char *p = buffer;
wgint n = number;
+ int last_digit_char = 0;
+
#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
- /* We are running in a strange or misconfigured environment. Let
- sprintf cope with it. */
- SPRINTF_WGINT (buffer, n);
- p += strlen (buffer);
+ /* We are running in a very strange environment. Leave the correct
+ printing to sprintf. */
+ p += sprintf (buf, "%j", (intmax_t) (n));
#else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
if (n < 0)
{
if (n < -WGINT_MAX)
{
- /* -n would overflow. Have sprintf deal with this. */
- SPRINTF_WGINT (buffer, n);
- p += strlen (buffer);
- return p;
+ /* n = -n would overflow because -n would evaluate to a
+ wgint value larger than WGINT_MAX. Need to make n
+ smaller and handle the last digit separately. */
+ int last_digit = n % 10;
+ /* The sign of n%10 is implementation-defined. */
+ if (last_digit < 0)
+ last_digit_char = '0' - last_digit;
+ else
+ last_digit_char = '0' + last_digit;
+ /* After n is made smaller, -n will not overflow. */
+ n /= 10;
}
*p++ = '-';
else DIGITS_19 (1000000000*(W)1000000000);
#endif
+ if (last_digit_char)
+ *p++ = last_digit_char;
+
*p = '\0';
#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
#endif /* not WINDOWS */
-/* Encode the string STR of length LENGTH to base64 format and place it
- to B64STORE. The output will be \0-terminated, and must point to a
- writable buffer of at least 1+BASE64_LENGTH(length) bytes. It
- returns the length of the resulting base64 data, not counting the
- terminating zero.
+/* Encode the octets in DATA of length LENGTH to base64 format,
+ storing the result to DEST. The output will be zero-terminated,
+ and must point to a writable buffer of at least
+ 1+BASE64_LENGTH(length) bytes. The function returns the length of
+ the resulting base64 data, not counting the terminating zero.
- This implementation will not emit newlines after 76 characters of
+ This implementation does not emit newlines after 76 characters of
base64 data. */
int
-base64_encode (const char *str, int length, char *b64store)
+base64_encode (const void *data, int length, char *dest)
{
/* Conversion table. */
- static char tbl[64] = {
- 'A','B','C','D','E','F','G','H',
- 'I','J','K','L','M','N','O','P',
- 'Q','R','S','T','U','V','W','X',
- 'Y','Z','a','b','c','d','e','f',
- 'g','h','i','j','k','l','m','n',
- 'o','p','q','r','s','t','u','v',
- 'w','x','y','z','0','1','2','3',
- '4','5','6','7','8','9','+','/'
+ static const char tbl[64] = {
+ 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
+ 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
+ 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
+ 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
};
- int i;
- const unsigned char *s = (const unsigned char *) str;
- char *p = b64store;
+ /* Access bytes in DATA as unsigned char, otherwise the shifts below
+ don't work for data with MSB set. */
+ const unsigned char *s = data;
+ /* Theoretical ANSI violation when length < 3. */
+ const unsigned char *end = (const unsigned char *) data + length - 2;
+ char *p = dest;
/* Transform the 3x8 bits to 4x6 bits, as required by base64. */
- for (i = 0; i < length; i += 3)
+ for (; s < end; s += 3)
{
*p++ = tbl[s[0] >> 2];
*p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
*p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
*p++ = tbl[s[2] & 0x3f];
- s += 3;
}
/* Pad the result if necessary... */
- if (i == length + 1)
- *(p - 1) = '=';
- else if (i == length + 2)
- *(p - 1) = *(p - 2) = '=';
-
+ switch (length % 3)
+ {
+ case 1:
+ *p++ = tbl[s[0] >> 2];
+ *p++ = tbl[(s[0] & 3) << 4];
+ *p++ = '=';
+ *p++ = '=';
+ break;
+ case 2:
+ *p++ = tbl[s[0] >> 2];
+ *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
+ *p++ = tbl[((s[1] & 0xf) << 2)];
+ *p++ = '=';
+ break;
+ }
/* ...and zero-terminate it. */
*p = '\0';
- return p - b64store;
+ return p - dest;
}
/* Store in C the next non-whitespace character from the string, or \0
#define IS_ASCII(c) (((c) & 0x80) == 0)
-/* Decode data from BASE64 (pointer to \0-terminated text) into memory
- pointed to by TO. TO should be large enough to accomodate the
- decoded data, which is guaranteed to be less than strlen(base64).
+/* Decode data from BASE64 (a null-terminated string) into memory
+ pointed to by DEST. DEST is assumed to be large enough to
+ accomodate the decoded data, which is guaranteed to be no more than
+ 3/4*strlen(base64).
- Since TO is assumed to contain binary data, it is not
+ Since DEST is assumed to contain binary data, it is not
NUL-terminated. The function returns the length of the data
written to TO. -1 is returned in case of error caused by malformed
- base64 input. */
+ base64 input.
+
+ This function originates from Free Recode. */
int
-base64_decode (const char *base64, char *to)
+base64_decode (const char *base64, void *dest)
{
/* Table of base64 values for first 128 characters. Note that this
assumes ASCII (but so does Wget in other places). */
- static signed char base64_char_to_value[128] =
+ static const signed char base64_char_to_value[128] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
const char *p = base64;
- char *q = to;
+ char *q = dest;
while (1)
{
#undef IS_BASE64
#undef BASE64_CHAR_TO_VALUE
- return q - to;
+ return q - (char *) dest;
}
#undef IS_ASCII
return buf;
}
+
+#ifdef TESTING
+
+const char *
+test_subdir_p()
+{
+ int i;
+ struct {
+ char *d1;
+ char *d2;
+ bool result;
+ } test_array[] = {
+ { "/somedir", "/somedir", true },
+ { "/somedir", "/somedir/d2", true },
+ { "/somedir/d1", "/somedir", false },
+ };
+
+ for (i = 0; i < countof(test_array); ++i)
+ {
+ bool res = subdir_p (test_array[i].d1, test_array[i].d2);
+
+ mu_assert ("test_subdir_p: wrong result",
+ res == test_array[i].result);
+ }
+
+ return NULL;
+}
+
+const char *
+test_dir_matches_p()
+{
+ int i;
+ struct {
+ char *dirlist[3];
+ char *dir;
+ bool result;
+ } test_array[] = {
+ { { "/somedir", "/someotherdir", NULL }, "somedir", true },
+ { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
+ { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
+ { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
+ { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
+ };
+
+ for (i = 0; i < countof(test_array); ++i)
+ {
+ bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
+
+ mu_assert ("test_dir_matches_p: wrong result",
+ res == test_array[i].result);
+ }
+
+ return NULL;
+}
+
+#endif /* TESTING */
+