X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=90b50043c59772253924e49fce99b115f8dbe092;hb=4d7c5e087b2bc82c9f503dff003916d1047903ce;hp=2a88158db50ee4c4229af58f6a00a8c9c5884910;hpb=4a0417f00bae994ddcc9946927f199b66a8a533b;p=wget diff --git a/src/utils.c b/src/utils.c index 2a88158d..90b50043 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,11 +1,11 @@ /* Various utility functions. - Copyright (C) 1996-2005 Free Software Foundation, Inc. + Copyright (C) 1996-2006 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +along with Wget. If not, see . In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -81,6 +80,10 @@ so, delete this exception statement from your version. */ #include "utils.h" #include "hash.h" +#ifdef TESTING +#include "test.h" +#endif + /* Utility function: like xstrdup(), but also lowercases S. */ char * @@ -155,7 +158,7 @@ sepstring (const char *s) char * aprintf (const char *fmt, ...) { -#ifdef HAVE_VASPRINTF +#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC /* Use vasprintf. */ int ret; va_list args; @@ -249,51 +252,38 @@ concat_strings (const char *str0, ...) return ret; } +/* Format the provided time according to the specified format. The + format is a string with format elements supported by strftime. */ + +static char * +fmttime (time_t t, const char *fmt) +{ + static char output[32]; + struct tm *tm = localtime(&t); + if (!tm) + abort (); + if (!strftime(output, sizeof(output), fmt, tm)) + abort (); + return output; +} + /* Return pointer to a static char[] buffer in which zero-terminated string-representation of TM (in form hh:mm:ss) is printed. If TM is NULL, the current time will be used. */ char * -time_str (time_t *tm) +time_str (time_t t) { - static char output[15]; - struct tm *ptm; - time_t secs = tm ? *tm : time (NULL); - - if (secs == -1) - { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; - } - ptm = localtime (&secs); - sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; + return fmttime(t, "%H:%M:%S"); } /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ char * -datetime_str (time_t *tm) +datetime_str (time_t t) { - static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */ - struct tm *ptm; - time_t secs = tm ? *tm : time (NULL); - - if (secs == -1) - { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; - } - ptm = localtime (&secs); - sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d", - ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday, - ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; + return fmttime(t, "%Y-%m-%d %H:%M:%S"); } /* The Windows versions of the following two functions are defined in @@ -677,46 +667,49 @@ acceptable (const char *s) return true; } -/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is - `/something', frontcmp() will return true only if S2 begins with - `/something'. */ +/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() + will return true if and only if D2 begins with `/something/' or is exactly + '/something'. */ bool -frontcmp (const char *s1, const char *s2) +subdir_p (const char *d1, const char *d2) { if (!opt.ignore_case) - for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2) + for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2) ; else - for (; *s1 && *s2 && (TOLOWER (*s1) == TOLOWER (*s2)); ++s1, ++s2) + for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2) ; - return *s1 == '\0'; + + return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); } -/* Iterate through STRLIST, and return the first element that matches - S, through wildcards or front comparison (as appropriate). */ -static char * -proclist (char **strlist, const char *s) +/* Iterate through DIRLIST (which must be NULL-terminated), and return the + first element that matches DIR, through wildcards or front comparison (as + appropriate). */ +static bool +dir_matches_p (char **dirlist, const char *dir) { char **x; int (*matcher) (const char *, const char *, int) = opt.ignore_case ? fnmatch_nocase : fnmatch; - for (x = strlist; *x; x++) + for (x = dirlist; *x; x++) { /* Remove leading '/' */ char *p = *x + (**x == '/'); if (has_wildcards_p (p)) { - if (matcher (p, s, FNM_PATHNAME) == 0) + if (matcher (p, dir, FNM_PATHNAME) == 0) break; } else { - if (frontcmp (p, s)) + if (subdir_p (p, dir)) break; } } - return *x; + + return *x ? true : false; } /* Returns whether DIRECTORY is acceptable for download, wrt the @@ -733,12 +726,12 @@ accdir (const char *directory) ++directory; if (opt.includes) { - if (!proclist (opt.includes, directory)) + if (!dir_matches_p (opt.includes, directory)) return false; } if (opt.excludes) { - if (proclist (opt.excludes, directory)) + if (dir_matches_p (opt.excludes, directory)) return false; } return true; @@ -1165,56 +1158,45 @@ string_set_contains (struct hash_table *ht, const char *s) return hash_table_contains (ht, s); } -static int -string_set_to_array_mapper (void *key, void *value_ignored, void *arg) -{ - char ***arrayptr = (char ***) arg; - *(*arrayptr)++ = (char *) key; - return 0; -} - /* Convert the specified string set to array. ARRAY should be large enough to hold hash_table_count(ht) char pointers. */ void string_set_to_array (struct hash_table *ht, char **array) { - hash_table_map (ht, string_set_to_array_mapper, &array); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + *array++ = iter.key; } -static int -string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) -{ - xfree (key); - return 0; -} +/* Free the string set. This frees both the storage allocated for + keys and the actual hash table. (hash_table_destroy would only + destroy the hash table.) */ void string_set_free (struct hash_table *ht) { - hash_table_map (ht, string_set_free_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + xfree (iter.key); hash_table_destroy (ht); } -static int -free_keys_and_values_mapper (void *key, void *value, void *arg_ignored) -{ - xfree (key); - xfree (value); - return 0; -} - -/* Another utility function: call free() on all keys and values of HT. */ +/* Utility function: simply call xfree() on all keys and values of HT. */ void free_keys_and_values (struct hash_table *ht) { - hash_table_map (ht, free_keys_and_values_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + { + xfree (iter.key); + xfree (iter.value); + } } - -/* Get grouping data, the separator and grouping info, by calling - localeconv(). The information is cached after the first call to - the function. +/* Get digit grouping data for thousand separors by calling + localeconv(). The data includes separator string and grouping info + and is cached after the first call to the function. In locales that don't set a thousand separator (such as the "C" locale), this forces it to be ",". We are now only showing @@ -1258,8 +1240,8 @@ get_grouping_data (const char **sep, const char **grouping) Unfortunately, we cannot use %'d (in fact it would be %'j) to get the separators because it's too non-portable, and it's hard to test - for this feature at configure time. Besides, it wouldn't work in - the "C" locale, which many Unix users still work in. */ + for this feature at configure time. Besides, it wouldn't display + separators in the "C" locale, still used by many Unix users. */ const char * with_thousand_seps (wgint n) @@ -1283,8 +1265,8 @@ with_thousand_seps (wgint n) atgroup = grouping; groupsize = *atgroup++; - /* This will overflow on WGINT_MIN, but we're not using this to - print negative numbers anyway. */ + /* This would overflow on WGINT_MIN, but printing negative numbers + is not an important goal of this fuinction. */ if (negative) n = -n; @@ -1326,11 +1308,10 @@ with_thousand_seps (wgint n) usually improves readability." This intentionally uses kilobyte (KB), megabyte (MB), etc. in their - original computer-related meaning of "powers of 1024". Powers of - 1000 would be useless since Wget already displays sizes with - thousand separators. We don't use the "*bibyte" names invented in - 1998, and seldom used in practice. Wikipedia's entry on kilobyte - discusses this in some detail. */ + original computer-related meaning of "powers of 1024". We don't + use the "*bibyte" names invented in 1998, and seldom used in + practice. Wikipedia's entry on "binary prefix" discusses this in + some detail. */ char * human_readable (HR_NUMTYPE n) @@ -1421,20 +1402,6 @@ numdigit (wgint number) #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10) #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10) -/* SPRINTF_WGINT is used by number_to_string to handle pathological - cases and to portably support strange sizes of wgint. Ideally this - would just use "%j" and intmax_t, but many systems don't support - it, so it's used only if nothing else works. */ -#if SIZEOF_LONG >= SIZEOF_WGINT -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n)) -#elif SIZEOF_LONG_LONG >= SIZEOF_WGINT -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n)) -#elif defined(WINDOWS) -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n)) -#else -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n)) -#endif - /* Shorthand for casting to wgint. */ #define W wgint @@ -1444,15 +1411,15 @@ numdigit (wgint number) The speedup may make a difference in programs that frequently convert numbers to strings. Some implementations of sprintf, - particularly the one in GNU libc, have been known to be extremely - slow when converting integers to strings. + particularly the one in some versions of GNU libc, have been known + to be quite slow when converting integers to strings. Return the pointer to the location where the terminating zero was printed. (Equivalent to calling buffer+strlen(buffer) after the function is done.) - BUFFER should be big enough to accept as many bytes as you expect - the number to take up. On machines with 64-bit longs the maximum + BUFFER should be large enough to accept as many bytes as you expect + the number to take up. On machines with 64-bit wgints the maximum needed size is 24 bytes. That includes the digits needed for the largest 64-bit number, the `-' sign in case it's negative, and the terminating '\0'. */ @@ -1463,21 +1430,29 @@ number_to_string (char *buffer, wgint number) char *p = buffer; wgint n = number; + int last_digit_char = 0; + #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8) - /* We are running in a strange or misconfigured environment. Let - sprintf cope with it. */ - SPRINTF_WGINT (buffer, n); - p += strlen (buffer); + /* We are running in a very strange environment. Leave the correct + printing to sprintf. */ + p += sprintf (buf, "%j", (intmax_t) (n)); #else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ if (n < 0) { if (n < -WGINT_MAX) { - /* -n would overflow. Have sprintf deal with this. */ - SPRINTF_WGINT (buffer, n); - p += strlen (buffer); - return p; + /* n = -n would overflow because -n would evaluate to a + wgint value larger than WGINT_MAX. Need to make n + smaller and handle the last digit separately. */ + int last_digit = n % 10; + /* The sign of n%10 is implementation-defined. */ + if (last_digit < 0) + last_digit_char = '0' - last_digit; + else + last_digit_char = '0' + last_digit; + /* After n is made smaller, -n will not overflow. */ + n /= 10; } *p++ = '-'; @@ -1517,6 +1492,9 @@ number_to_string (char *buffer, wgint number) else DIGITS_19 (1000000000*(W)1000000000); #endif + if (last_digit_char) + *p++ = last_digit_char; + *p = '\0'; #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ @@ -1897,53 +1875,61 @@ xsleep (double seconds) #endif /* not WINDOWS */ -/* Encode the string STR of length LENGTH to base64 format and place it - to B64STORE. The output will be \0-terminated, and must point to a - writable buffer of at least 1+BASE64_LENGTH(length) bytes. It - returns the length of the resulting base64 data, not counting the - terminating zero. +/* Encode the octets in DATA of length LENGTH to base64 format, + storing the result to DEST. The output will be zero-terminated, + and must point to a writable buffer of at least + 1+BASE64_LENGTH(length) bytes. The function returns the length of + the resulting base64 data, not counting the terminating zero. - This implementation will not emit newlines after 76 characters of + This implementation does not emit newlines after 76 characters of base64 data. */ int -base64_encode (const char *str, int length, char *b64store) +base64_encode (const void *data, int length, char *dest) { /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' + static const char tbl[64] = { + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', + 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', + 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', + 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' }; - int i; - const unsigned char *s = (const unsigned char *) str; - char *p = b64store; + /* Access bytes in DATA as unsigned char, otherwise the shifts below + don't work for data with MSB set. */ + const unsigned char *s = data; + /* Theoretical ANSI violation when length < 3. */ + const unsigned char *end = (const unsigned char *) data + length - 2; + char *p = dest; /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) + for (; s < end; s += 3) { *p++ = tbl[s[0] >> 2]; *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; *p++ = tbl[s[2] & 0x3f]; - s += 3; } /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - + switch (length % 3) + { + case 1: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[(s[0] & 3) << 4]; + *p++ = '='; + *p++ = '='; + break; + case 2: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; + *p++ = tbl[((s[1] & 0xf) << 2)]; + *p++ = '='; + break; + } /* ...and zero-terminate it. */ *p = '\0'; - return p - b64store; + return p - dest; } /* Store in C the next non-whitespace character from the string, or \0 @@ -1954,21 +1940,24 @@ base64_encode (const char *str, int length, char *b64store) #define IS_ASCII(c) (((c) & 0x80) == 0) -/* Decode data from BASE64 (pointer to \0-terminated text) into memory - pointed to by TO. TO should be large enough to accomodate the - decoded data, which is guaranteed to be less than strlen(base64). +/* Decode data from BASE64 (a null-terminated string) into memory + pointed to by DEST. DEST is assumed to be large enough to + accomodate the decoded data, which is guaranteed to be no more than + 3/4*strlen(base64). - Since TO is assumed to contain binary data, it is not + Since DEST is assumed to contain binary data, it is not NUL-terminated. The function returns the length of the data written to TO. -1 is returned in case of error caused by malformed - base64 input. */ + base64 input. + + This function originates from Free Recode. */ int -base64_decode (const char *base64, char *to) +base64_decode (const char *base64, void *dest) { /* Table of base64 values for first 128 characters. Note that this assumes ASCII (but so does Wget in other places). */ - static signed char base64_char_to_value[128] = + static const signed char base64_char_to_value[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */ @@ -1988,7 +1977,7 @@ base64_decode (const char *base64, char *to) #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=') const char *p = base64; - char *q = to; + char *q = dest; while (1) { @@ -2047,7 +2036,7 @@ base64_decode (const char *base64, char *to) #undef IS_BASE64 #undef BASE64_CHAR_TO_VALUE - return q - to; + return q - (char *) dest; } #undef IS_ASCII @@ -2133,3 +2122,60 @@ print_decimal (double number) return buf; } + +#ifdef TESTING + +const char * +test_subdir_p() +{ + int i; + struct { + char *d1; + char *d2; + bool result; + } test_array[] = { + { "/somedir", "/somedir", true }, + { "/somedir", "/somedir/d2", true }, + { "/somedir/d1", "/somedir", false }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res = subdir_p (test_array[i].d1, test_array[i].d2); + + mu_assert ("test_subdir_p: wrong result", + res == test_array[i].result); + } + + return NULL; +} + +const char * +test_dir_matches_p() +{ + int i; + struct { + char *dirlist[3]; + char *dir; + bool result; + } test_array[] = { + { { "/somedir", "/someotherdir", NULL }, "somedir", true }, + { { "/somedir", "/someotherdir", NULL }, "anotherdir", false }, + { { "/somedir", "/*otherdir", NULL }, "anotherdir", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "d1", false }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); + + mu_assert ("test_dir_matches_p: wrong result", + res == test_array[i].result); + } + + return NULL; +} + +#endif /* TESTING */ +