X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=7a90c0692dfe6a569e1e53bafd6e92de23d91d36;hb=8566a727674ab3c2b0df03c31c6085a0d5d5bf81;hp=96d4767c80f63dc4d52affb018bf31e864d8bdc5;hpb=c9049f94d77b1fe100b53848b545e5b1e61d7df3;p=wget diff --git a/src/utils.c b/src/utils.c index 96d4767c..7a90c069 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,5 +1,5 @@ /* Various utility functions. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 1996-2006 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -14,8 +14,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -42,9 +42,6 @@ so, delete this exception statement from your version. */ #ifdef HAVE_MMAP # include #endif -#ifdef HAVE_PWD_H -# include -#endif #ifdef HAVE_UTIME_H # include #endif @@ -52,15 +49,10 @@ so, delete this exception statement from your version. */ # include #endif #include -#ifdef NeXT -# include /* for access() */ -#endif #include #include #include -#ifdef HAVE_LOCALE_H -# include -#endif +#include /* For TIOCGWINSZ and friends: */ #ifdef HAVE_SYS_IOCTL_H @@ -70,11 +62,9 @@ so, delete this exception statement from your version. */ # include #endif -/* Needed for run_with_timeout. */ +/* Needed for Unix version of run_with_timeout. */ #include -#ifdef HAVE_SETJMP_H -# include -#endif +#include #ifndef HAVE_SIGSETJMP /* If sigsetjmp is a macro, configure won't pick it up. */ @@ -83,8 +73,7 @@ so, delete this exception statement from your version. */ # endif #endif -#undef USE_SIGNAL_TIMEOUT -#if defined(HAVE_SIGSETJMP) || defined(HAVE_SIGBLOCK) +#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK # define USE_SIGNAL_TIMEOUT #endif @@ -92,6 +81,10 @@ so, delete this exception statement from your version. */ #include "utils.h" #include "hash.h" +#ifdef TESTING +#include "test.h" +#endif + /* Utility function: like xstrdup(), but also lowercases S. */ char * @@ -152,28 +145,48 @@ sepstring (const char *s) return res; } -/* Like sprintf, but allocates a string of sufficient size with malloc - and returns it. GNU libc has a similar function named asprintf, - which requires the pointer to the string to be passed. */ +/* Like sprintf, but prints into a string of sufficient size freshly + allocated with malloc, which is returned. If unable to print due + to invalid format, returns NULL. Inability to allocate needed + memory results in abort, as with xmalloc. This is in spirit + similar to the GNU/BSD extension asprintf, but somewhat easier to + use. + + Internally the function either calls vasprintf or loops around + vsnprintf until the correct size is found. Since Wget also ships a + fallback implementation of vsnprintf, this should be portable. */ char * aprintf (const char *fmt, ...) { - /* This function is implemented using vsnprintf, which we provide - for the systems that don't have it. Therefore, it should be 100% - portable. */ +#ifdef HAVE_VASPRINTF + /* Use vasprintf. */ + int ret; + va_list args; + char *str; + va_start (args, fmt); + ret = vasprintf (&str, fmt, args); + va_end (args); + if (ret < 0 && errno == ENOMEM) + abort (); /* for consistency with xmalloc/xrealloc */ + else if (ret < 0) + return NULL; + return str; +#else /* not HAVE_VASPRINTF */ + /* vasprintf is unavailable. snprintf into a small buffer and + resize it as necessary. */ int size = 32; char *str = xmalloc (size); + /* #### This code will infloop and eventually abort in xrealloc if + passed a FMT that causes snprintf to consistently return -1. */ + while (1) { int n; va_list args; - /* See log_vprintf_internal for explanation why it's OK to rely - on the return value of vsnprintf. */ - va_start (args, fmt); n = vsnprintf (str, size, fmt, args); va_end (args); @@ -189,6 +202,7 @@ aprintf (const char *fmt, ...) size <<= 1; /* twice the old size */ str = xrealloc (str, size); } +#endif /* not HAVE_VASPRINTF */ } /* Concatenate the NULL-terminated list of string arguments into @@ -239,51 +253,38 @@ concat_strings (const char *str0, ...) return ret; } +/* Format the provided time according to the specified format. The + format is a string with format elements supported by strftime. */ + +static char * +fmttime (time_t t, const char *fmt) +{ + static char output[32]; + struct tm *tm = localtime(&t); + if (!tm) + abort (); + if (!strftime(output, sizeof(output), fmt, tm)) + abort (); + return output; +} + /* Return pointer to a static char[] buffer in which zero-terminated string-representation of TM (in form hh:mm:ss) is printed. If TM is NULL, the current time will be used. */ char * -time_str (time_t *tm) +time_str (time_t t) { - static char output[15]; - struct tm *ptm; - time_t secs = tm ? *tm : time (NULL); - - if (secs == -1) - { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; - } - ptm = localtime (&secs); - sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; + return fmttime(t, "%H:%M:%S"); } /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ char * -datetime_str (time_t *tm) +datetime_str (time_t t) { - static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */ - struct tm *ptm; - time_t secs = tm ? *tm : time (NULL); - - if (secs == -1) - { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; - } - ptm = localtime (&secs); - sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d", - ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday, - ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; + return fmttime(t, "%Y-%m-%d %H:%M:%S"); } /* The Windows versions of the following two functions are defined in @@ -598,7 +599,7 @@ make_directory (const char *directory) file_merge("/foo/bar/", "baz") => "/foo/bar/baz" file_merge("foo", "bar") => "bar" - In other words, it's a simpler and gentler version of uri_merge_1. */ + In other words, it's a simpler and gentler version of uri_merge. */ char * file_merge (const char *base, const char *file) @@ -617,6 +618,30 @@ file_merge (const char *base, const char *file) return result; } +/* Like fnmatch, but performs a case-insensitive match. */ + +int +fnmatch_nocase (const char *pattern, const char *string, int flags) +{ +#ifdef FNM_CASEFOLD + /* The FNM_CASEFOLD flag started as a GNU extension, but it is now + also present on *BSD platforms, and possibly elsewhere. */ + return fnmatch (pattern, string, flags | FNM_CASEFOLD); +#else + /* Turn PATTERN and STRING to lower case and call fnmatch on them. */ + char *patcopy = (char *) alloca (strlen (pattern) + 1); + char *strcopy = (char *) alloca (strlen (string) + 1); + char *p; + for (p = patcopy; *pattern; pattern++, p++) + *p = TOLOWER (*pattern); + *p = '\0'; + for (p = strcopy; *string; string++, p++) + *p = TOLOWER (*string); + *p = '\0'; + return fnmatch (patcopy, strcopy, flags); +#endif +} + static bool in_acclist (const char *const *, const char *, bool); /* Determine whether a file is acceptable to be followed, according to @@ -643,59 +668,71 @@ acceptable (const char *s) return true; } -/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is - `/something', frontcmp() will return 1 only if S2 begins with - `/something'. Otherwise, 0 is returned. */ +/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() + will return true if and only if D2 begins with `/something/' or is exactly + '/something'. */ bool -frontcmp (const char *s1, const char *s2) +subdir_p (const char *d1, const char *d2) { - for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2); - return *s1 == '\0'; + if (!opt.ignore_case) + for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2) + ; + else + for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2) + ; + + return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); } -/* Iterate through STRLIST, and return the first element that matches - S, through wildcards or front comparison (as appropriate). */ -static char * -proclist (char **strlist, const char *s, enum accd flags) +/* Iterate through DIRLIST (which must be NULL-terminated), and return the + first element that matches DIR, through wildcards or front comparison (as + appropriate). */ +static bool +dir_matches_p (char **dirlist, const char *dir) { char **x; - for (x = strlist; *x; x++) + int (*matcher) (const char *, const char *, int) + = opt.ignore_case ? fnmatch_nocase : fnmatch; + + for (x = dirlist; *x; x++) { - /* Remove leading '/' if ALLABS */ - char *p = *x + ((flags & ALLABS) && (**x == '/')); + /* Remove leading '/' */ + char *p = *x + (**x == '/'); if (has_wildcards_p (p)) { - if (fnmatch (p, s, FNM_PATHNAME) == 0) + if (matcher (p, dir, FNM_PATHNAME) == 0) break; } else { - if (frontcmp (p, s)) + if (subdir_p (p, dir)) break; } } - return *x; + + return *x ? true : false; } /* Returns whether DIRECTORY is acceptable for download, wrt the include/exclude lists. - If FLAGS is ALLABS, the leading `/' is ignored in paths; relative - and absolute paths may be freely intermixed. */ + The leading `/' is ignored in paths; relative and absolute paths + may be freely intermixed. */ + bool -accdir (const char *directory, enum accd flags) +accdir (const char *directory) { /* Remove starting '/'. */ - if (flags & ALLABS && *directory == '/') + if (*directory == '/') ++directory; if (opt.includes) { - if (!proclist (opt.includes, directory, flags)) + if (!dir_matches_p (opt.includes, directory)) return false; } if (opt.excludes) { - if (proclist (opt.excludes, directory, flags)) + if (dir_matches_p (opt.excludes, directory)) return false; } return true; @@ -750,21 +787,24 @@ in_acclist (const char *const *accepts, const char *s, bool backward) { if (has_wildcards_p (*accepts)) { - /* fnmatch returns 0 if the pattern *does* match the - string. */ - if (fnmatch (*accepts, s, 0) == 0) + int res = opt.ignore_case + ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0); + /* fnmatch returns 0 if the pattern *does* match the string. */ + if (res == 0) return true; } else { if (backward) { - if (match_tail (s, *accepts, 0)) + if (match_tail (s, *accepts, opt.ignore_case)) return true; } else { - if (!strcmp (s, *accepts)) + int cmp = opt.ignore_case + ? strcasecmp (s, *accepts) : strcmp (s, *accepts); + if (cmp == 0) return true; } } @@ -1054,9 +1094,11 @@ merge_vecs (char **v1, char **v2) return v1; } /* Count v1. */ - for (i = 0; v1[i]; i++); + for (i = 0; v1[i]; i++) + ; /* Count v2. */ - for (j = 0; v2[j]; j++); + for (j = 0; v2[j]; j++) + ; /* Reallocate v1. */ v1 = xrealloc (v1, (i + j + 1) * sizeof (char **)); memcpy (v1 + i, v2, (j + 1) * sizeof (char *)); @@ -1117,53 +1159,51 @@ string_set_contains (struct hash_table *ht, const char *s) return hash_table_contains (ht, s); } -static int -string_set_to_array_mapper (void *key, void *value_ignored, void *arg) -{ - char ***arrayptr = (char ***) arg; - *(*arrayptr)++ = (char *) key; - return 0; -} - /* Convert the specified string set to array. ARRAY should be large enough to hold hash_table_count(ht) char pointers. */ void string_set_to_array (struct hash_table *ht, char **array) { - hash_table_map (ht, string_set_to_array_mapper, &array); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + *array++ = iter.key; } -static int -string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) -{ - xfree (key); - return 0; -} +/* Free the string set. This frees both the storage allocated for + keys and the actual hash table. (hash_table_destroy would only + destroy the hash table.) */ void string_set_free (struct hash_table *ht) { - hash_table_map (ht, string_set_free_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + xfree (iter.key); hash_table_destroy (ht); } -static int -free_keys_and_values_mapper (void *key, void *value, void *arg_ignored) -{ - xfree (key); - xfree (value); - return 0; -} - -/* Another utility function: call free() on all keys and values of HT. */ +/* Utility function: simply call xfree() on all keys and values of HT. */ void free_keys_and_values (struct hash_table *ht) { - hash_table_map (ht, free_keys_and_values_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + { + xfree (iter.key); + xfree (iter.value); + } } - +/* Get digit grouping data for thousand separors by calling + localeconv(). The data includes separator string and grouping info + and is cached after the first call to the function. + + In locales that don't set a thousand separator (such as the "C" + locale), this forces it to be ",". We are now only showing + thousand separators in one place, so this shouldn't be a problem in + practice. */ + static void get_grouping_data (const char **sep, const char **grouping) { @@ -1172,20 +1212,23 @@ get_grouping_data (const char **sep, const char **grouping) static bool initialized; if (!initialized) { -#ifdef LC_NUMERIC /* Get the grouping info from the locale. */ - struct lconv *lconv; - const char *oldlocale = setlocale (LC_NUMERIC, ""); - lconv = localeconv (); - cached_sep = xstrdup (lconv->thousands_sep); - cached_grouping = xstrdup (lconv->grouping); - /* Restore the locale to previous settings. */ - setlocale (LC_NUMERIC, oldlocale); - if (!cached_sep) -#endif - /* Force separator for locales that specify no separators - ("C", "hr", and probably many more.) */ - cached_sep = ",", cached_grouping = "\x03"; + struct lconv *lconv = localeconv (); + cached_sep = lconv->thousands_sep; + cached_grouping = lconv->grouping; + if (!*cached_sep) + { + /* Many locales (such as "C" or "hr_HR") don't specify + grouping, which we still want to use it for legibility. + In those locales set the sep char to ',', unless that + character is used for decimal point, in which case set it + to ".". */ + if (*lconv->decimal_point != ',') + cached_sep = ","; + else + cached_sep = "."; + cached_grouping = "\x03"; + } initialized = true; } *sep = cached_sep; @@ -1198,8 +1241,8 @@ get_grouping_data (const char **sep, const char **grouping) Unfortunately, we cannot use %'d (in fact it would be %'j) to get the separators because it's too non-portable, and it's hard to test - for this feature at configure time. Besides, it wouldn't work in - the "C" locale, which many Unix users still work in. */ + for this feature at configure time. Besides, it wouldn't display + separators in the "C" locale, still used by many Unix users. */ const char * with_thousand_seps (wgint n) @@ -1223,8 +1266,8 @@ with_thousand_seps (wgint n) atgroup = grouping; groupsize = *atgroup++; - /* This will overflow on WGINT_MIN, but we're not using this to - print negative numbers anyway. */ + /* This would overflow on WGINT_MIN, but printing negative numbers + is not an important goal of this fuinction. */ if (negative) n = -n; @@ -1266,11 +1309,10 @@ with_thousand_seps (wgint n) usually improves readability." This intentionally uses kilobyte (KB), megabyte (MB), etc. in their - original computer-related meaning of "powers of 1024". Powers of - 1000 would be useless since Wget already displays sizes with - thousand separators. We don't use the "*bibyte" names invented in - 1998, and seldom used in practice. Wikipedia's entry on kilobyte - discusses this in some detail. */ + original computer-related meaning of "powers of 1024". We don't + use the "*bibyte" names invented in 1998, and seldom used in + practice. Wikipedia's entry on "binary prefix" discusses this in + some detail. */ char * human_readable (HR_NUMTYPE n) @@ -1305,10 +1347,7 @@ human_readable (HR_NUMTYPE n) *this* power. */ if ((n / 1024) < 1024 || i == countof (powers) - 1) { - /* Must cast to long first because MS VC can't directly cast - __int64 to double. (This is safe because N is known to - be < 1024^2, so always fits into long.) */ - double val = (double) (long) n / 1024.0; + double val = n / 1024.0; /* Print values smaller than 10 with one decimal digits, and others without any decimals. */ snprintf (buf, sizeof (buf), "%.*f%c", @@ -1364,20 +1403,6 @@ numdigit (wgint number) #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10) #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10) -/* SPRINTF_WGINT is used by number_to_string to handle pathological - cases and to portably support strange sizes of wgint. Ideally this - would just use "%j" and intmax_t, but many systems don't support - it, so it's used only if nothing else works. */ -#if SIZEOF_LONG >= SIZEOF_WGINT -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n)) -#elif SIZEOF_LONG_LONG >= SIZEOF_WGINT -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n)) -#elif defined(WINDOWS) -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n)) -#else -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n)) -#endif - /* Shorthand for casting to wgint. */ #define W wgint @@ -1387,15 +1412,15 @@ numdigit (wgint number) The speedup may make a difference in programs that frequently convert numbers to strings. Some implementations of sprintf, - particularly the one in GNU libc, have been known to be extremely - slow when converting integers to strings. + particularly the one in some versions of GNU libc, have been known + to be quite slow when converting integers to strings. Return the pointer to the location where the terminating zero was printed. (Equivalent to calling buffer+strlen(buffer) after the function is done.) - BUFFER should be big enough to accept as many bytes as you expect - the number to take up. On machines with 64-bit longs the maximum + BUFFER should be large enough to accept as many bytes as you expect + the number to take up. On machines with 64-bit wgints the maximum needed size is 24 bytes. That includes the digits needed for the largest 64-bit number, the `-' sign in case it's negative, and the terminating '\0'. */ @@ -1406,21 +1431,29 @@ number_to_string (char *buffer, wgint number) char *p = buffer; wgint n = number; + int last_digit_char = 0; + #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8) - /* We are running in a strange or misconfigured environment. Let - sprintf cope with it. */ - SPRINTF_WGINT (buffer, n); - p += strlen (buffer); + /* We are running in a very strange environment. Leave the correct + printing to sprintf. */ + p += sprintf (buf, "%j", (intmax_t) (n)); #else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ if (n < 0) { if (n < -WGINT_MAX) { - /* -n would overflow. Have sprintf deal with this. */ - SPRINTF_WGINT (buffer, n); - p += strlen (buffer); - return p; + /* n = -n would overflow because -n would evaluate to a + wgint value larger than WGINT_MAX. Need to make n + smaller and handle the last digit separately. */ + int last_digit = n % 10; + /* The sign of n%10 is implementation-defined. */ + if (last_digit < 0) + last_digit_char = '0' - last_digit; + else + last_digit_char = '0' + last_digit; + /* After n is made smaller, -n will not overflow. */ + n /= 10; } *p++ = '-'; @@ -1460,6 +1493,9 @@ number_to_string (char *buffer, wgint number) else DIGITS_19 (1000000000*(W)1000000000); #endif + if (last_digit_char) + *p++ = last_digit_char; + *p = '\0'; #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ @@ -1468,6 +1504,7 @@ number_to_string (char *buffer, wgint number) #undef PR #undef W +#undef SPRINTF_WGINT #undef DIGITS_1 #undef DIGITS_2 #undef DIGITS_3 @@ -1561,68 +1598,74 @@ determine_screen_width (void) return 0; #endif /* neither TIOCGWINSZ nor WINDOWS */ } + +/* Whether the rnd system (either rand or [dl]rand48) has been + seeded. */ +static int rnd_seeded; /* Return a random number between 0 and MAX-1, inclusive. - If MAX is greater than the value of RAND_MAX+1 on the system, the - returned value will be in the range [0, RAND_MAX]. This may be - fixed in a future release. - + If the system does not support lrand48 and MAX is greater than the + value of RAND_MAX+1 on the system, the returned value will be in + the range [0, RAND_MAX]. This may be fixed in a future release. The random number generator is seeded automatically the first time it is called. - This uses rand() for portability. It has been suggested that - random() offers better randomness, but this is not required for - Wget, so I chose to go for simplicity and use rand - unconditionally. - - DO NOT use this for cryptographic purposes. It is only meant to be - used in situations where quality of the random numbers returned - doesn't really matter. */ + This uses lrand48 where available, rand elsewhere. DO NOT use it + for cryptography. It is only meant to be used in situations where + quality of the random numbers returned doesn't really matter. */ int random_number (int max) { - static int seeded; +#ifdef HAVE_DRAND48 + if (!rnd_seeded) + { + srand48 ((long) time (NULL) ^ (long) getpid ()); + rnd_seeded = 1; + } + return lrand48 () % max; +#else /* not HAVE_DRAND48 */ + double bounded; int rnd; - - if (!seeded) + if (!rnd_seeded) { - srand (time (NULL)); - seeded = 1; + srand ((unsigned) time (NULL) ^ (unsigned) getpid ()); + rnd_seeded = 1; } rnd = rand (); - /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1, - and enforce that assumption by masking other bits. */ -#ifndef RAND_MAX -# define RAND_MAX 32767 - rnd &= RAND_MAX; -#endif + /* Like rand() % max, but uses the high-order bits for better + randomness on architectures where rand() is implemented using a + simple congruential generator. */ - /* This is equivalent to rand() % max, but uses the high-order bits - for better randomness on architecture where rand() is implemented - using a simple congruential generator. */ + bounded = (double) max * rnd / (RAND_MAX + 1.0); + return (int) bounded; - bounded = (double)max * rnd / (RAND_MAX + 1.0); - return (int)bounded; +#endif /* not HAVE_DRAND48 */ } /* Return a random uniformly distributed floating point number in the - [0, 1) range. The precision of returned numbers is 9 digits. - - Modify this to use erand48() where available! */ + [0, 1) range. Uses drand48 where available, and a really lame + kludge elsewhere. */ double random_float (void) { - /* We can't rely on any specific value of RAND_MAX, but I'm pretty - sure it's greater than 1000. */ - int rnd1 = random_number (1000); - int rnd2 = random_number (1000); - int rnd3 = random_number (1000); - return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0; +#ifdef HAVE_DRAND48 + if (!rnd_seeded) + { + srand48 ((long) time (NULL) ^ (long) getpid ()); + rnd_seeded = 1; + } + return drand48 (); +#else /* not HAVE_DRAND48 */ + return ( random_number (10000) / 10000.0 + + random_number (10000) / (10000.0 * 10000.0) + + random_number (10000) / (10000.0 * 10000.0 * 10000.0) + + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0)); +#endif /* not HAVE_DRAND48 */ } /* Implementation of run_with_timeout, a generic timeout-forcing @@ -1833,80 +1876,89 @@ xsleep (double seconds) #endif /* not WINDOWS */ -/* Encode the string STR of length LENGTH to base64 format and place it - to B64STORE. The output will be \0-terminated, and must point to a - writable buffer of at least 1+BASE64_LENGTH(length) bytes. It - returns the length of the resulting base64 data, not counting the - terminating zero. +/* Encode the octets in DATA of length LENGTH to base64 format, + storing the result to DEST. The output will be zero-terminated, + and must point to a writable buffer of at least + 1+BASE64_LENGTH(length) bytes. The function returns the length of + the resulting base64 data, not counting the terminating zero. - This implementation will not emit newlines after 76 characters of + This implementation does not emit newlines after 76 characters of base64 data. */ int -base64_encode (const char *str, int length, char *b64store) +base64_encode (const void *data, int length, char *dest) { /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' + static const char tbl[64] = { + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', + 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', + 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', + 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' }; - int i; - const unsigned char *s = (const unsigned char *) str; - char *p = b64store; + /* Access bytes in DATA as unsigned char, otherwise the shifts below + don't work for data with MSB set. */ + const unsigned char *s = data; + /* Theoretical ANSI violation when length < 3. */ + const unsigned char *end = (const unsigned char *) data + length - 2; + char *p = dest; /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) + for (; s < end; s += 3) { *p++ = tbl[s[0] >> 2]; *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; *p++ = tbl[s[2] & 0x3f]; - s += 3; } /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - + switch (length % 3) + { + case 1: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[(s[0] & 3) << 4]; + *p++ = '='; + *p++ = '='; + break; + case 2: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; + *p++ = tbl[((s[1] & 0xf) << 2)]; + *p++ = '='; + break; + } /* ...and zero-terminate it. */ *p = '\0'; - return p - b64store; + return p - dest; } -#define IS_ASCII(c) (((c) & 0x80) == 0) -#define IS_BASE64(c) ((IS_ASCII (c) && base64_char_to_value[c] >= 0) || c == '=') +/* Store in C the next non-whitespace character from the string, or \0 + when end of string is reached. */ +#define NEXT_CHAR(c, p) do { \ + c = (unsigned char) *p++; \ +} while (ISSPACE (c)) -/* Get next character from the string, except that non-base64 - characters are ignored, as mandated by rfc2045. */ -#define NEXT_BASE64_CHAR(c, p) do { \ - c = *p++; \ -} while (c != '\0' && !IS_BASE64 (c)) +#define IS_ASCII(c) (((c) & 0x80) == 0) -/* Decode data from BASE64 (assumed to be encoded as base64) into - memory pointed to by TO. TO should be large enough to accomodate - the decoded data, which is guaranteed to be less than - strlen(base64). +/* Decode data from BASE64 (a null-terminated string) into memory + pointed to by DEST. DEST is assumed to be large enough to + accomodate the decoded data, which is guaranteed to be no more than + 3/4*strlen(base64). - Since TO is assumed to contain binary data, it is not + Since DEST is assumed to contain binary data, it is not NUL-terminated. The function returns the length of the data written to TO. -1 is returned in case of error caused by malformed - base64 input. */ + base64 input. + + This function originates from Free Recode. */ int -base64_decode (const char *base64, char *to) +base64_decode (const char *base64, void *dest) { /* Table of base64 values for first 128 characters. Note that this assumes ASCII (but so does Wget in other places). */ - static short base64_char_to_value[128] = + static const signed char base64_char_to_value[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */ @@ -1922,9 +1974,11 @@ base64_decode (const char *base64, char *to) 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */ 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */ }; +#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c]) +#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=') const char *p = base64; - char *q = to; + char *q = dest; while (1) { @@ -1932,30 +1986,32 @@ base64_decode (const char *base64, char *to) unsigned long value; /* Process first byte of a quadruplet. */ - NEXT_BASE64_CHAR (c, p); + NEXT_CHAR (c, p); if (!c) break; - if (c == '=') - return -1; /* illegal '=' while decoding base64 */ - value = base64_char_to_value[c] << 18; + if (c == '=' || !IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + value = BASE64_CHAR_TO_VALUE (c) << 18; - /* Process scond byte of a quadruplet. */ - NEXT_BASE64_CHAR (c, p); + /* Process second byte of a quadruplet. */ + NEXT_CHAR (c, p); if (!c) return -1; /* premature EOF while decoding base64 */ - if (c == '=') - return -1; /* illegal `=' while decoding base64 */ - value |= base64_char_to_value[c] << 12; + if (c == '=' || !IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + value |= BASE64_CHAR_TO_VALUE (c) << 12; *q++ = value >> 16; /* Process third byte of a quadruplet. */ - NEXT_BASE64_CHAR (c, p); + NEXT_CHAR (c, p); if (!c) return -1; /* premature EOF while decoding base64 */ + if (!IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ if (c == '=') { - NEXT_BASE64_CHAR (c, p); + NEXT_CHAR (c, p); if (!c) return -1; /* premature EOF while decoding base64 */ if (c != '=') @@ -1963,26 +2019,29 @@ base64_decode (const char *base64, char *to) continue; } - value |= base64_char_to_value[c] << 6; + value |= BASE64_CHAR_TO_VALUE (c) << 6; *q++ = 0xff & value >> 8; /* Process fourth byte of a quadruplet. */ - NEXT_BASE64_CHAR (c, p); + NEXT_CHAR (c, p); if (!c) return -1; /* premature EOF while decoding base64 */ if (c == '=') continue; + if (!IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ - value |= base64_char_to_value[c]; + value |= BASE64_CHAR_TO_VALUE (c); *q++ = 0xff & value; } +#undef IS_BASE64 +#undef BASE64_CHAR_TO_VALUE - return q - to; + return q - (char *) dest; } #undef IS_ASCII -#undef IS_BASE64 -#undef NEXT_BASE64_CHAR +#undef NEXT_CHAR /* Simple merge sort for use by stable_sort. Implementation courtesy Zeljko Vrba with additional debugging by Nenad Barbutov. */ @@ -2029,3 +2088,95 @@ stable_sort (void *base, size_t nmemb, size_t size, mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun); } } + +/* Print a decimal number. If it is equal to or larger than ten, the + number is rounded. Otherwise it is printed with one significant + digit without trailing zeros and with no more than three fractional + digits total. For example, 0.1 is printed as "0.1", 0.035 is + printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0". + + This is useful for displaying durations because it provides + order-of-magnitude information without unnecessary clutter -- + long-running downloads are shown without the fractional part, and + short ones still retain one significant digit. */ + +const char * +print_decimal (double number) +{ + static char buf[32]; + double n = number >= 0 ? number : -number; + + if (n >= 9.95) + /* Cut off at 9.95 because the below %.1f would round 9.96 to + "10.0" instead of "10". OTOH 9.94 will print as "9.9". */ + snprintf (buf, sizeof buf, "%.0f", number); + else if (n >= 0.95) + snprintf (buf, sizeof buf, "%.1f", number); + else if (n >= 0.001) + snprintf (buf, sizeof buf, "%.1g", number); + else if (n >= 0.0005) + /* round [0.0005, 0.001) to 0.001 */ + snprintf (buf, sizeof buf, "%.3f", number); + else + /* print numbers close to 0 as 0, not 0.000 */ + strcpy (buf, "0"); + + return buf; +} + +#ifdef TESTING + +const char * +test_subdir_p() +{ + int i; + struct { + char *d1; + char *d2; + bool result; + } test_array[] = { + { "/somedir", "/somedir", true }, + { "/somedir", "/somedir/d2", true }, + { "/somedir/d1", "/somedir", false }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res = subdir_p (test_array[i].d1, test_array[i].d2); + + mu_assert ("test_subdir_p: wrong result", + res == test_array[i].result); + } + + return NULL; +} + +const char * +test_dir_matches_p() +{ + int i; + struct { + char *dirlist[3]; + char *dir; + bool result; + } test_array[] = { + { { "/somedir", "/someotherdir", NULL }, "somedir", true }, + { { "/somedir", "/someotherdir", NULL }, "anotherdir", false }, + { { "/somedir", "/*otherdir", NULL }, "anotherdir", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "d1", false }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); + + mu_assert ("test_dir_matches_p: wrong result", + res == test_array[i].result); + } + + return NULL; +} + +#endif /* TESTING */ +