X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=90b50043c59772253924e49fce99b115f8dbe092;hb=4d7c5e087b2bc82c9f503dff003916d1047903ce;hp=2894d8f30a69b62253b3a5a83358e3f3d6322070;hpb=e65ff5729a876e9f0010ea3d00edb41cc0367a3e;p=wget diff --git a/src/utils.c b/src/utils.c index 2894d8f3..90b50043 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,11 +1,11 @@ /* Various utility functions. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 1996-2006 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget. If not, see . In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -31,24 +30,17 @@ so, delete this exception statement from your version. */ #include #include -#ifdef HAVE_STRING_H -# include -#else /* not HAVE_STRING_H */ -# include -#endif /* not HAVE_STRING_H */ -#include +#include +#include +#ifdef HAVE_SYS_TIME_H +# include +#endif #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_MMAP # include #endif -#ifdef HAVE_PWD_H -# include -#endif -#ifdef HAVE_LIMITS_H -# include -#endif #ifdef HAVE_UTIME_H # include #endif @@ -56,16 +48,10 @@ so, delete this exception statement from your version. */ # include #endif #include -#ifdef NeXT -# include /* for access() */ -#endif #include #include -#ifdef WGET_USE_STDARG -# include -#else -# include -#endif +#include +#include /* For TIOCGWINSZ and friends: */ #ifdef HAVE_SYS_IOCTL_H @@ -75,14 +61,9 @@ so, delete this exception statement from your version. */ # include #endif -/* Needed for run_with_timeout. */ -#undef USE_SIGNAL_TIMEOUT -#ifdef HAVE_SIGNAL_H -# include -#endif -#ifdef HAVE_SETJMP_H -# include -#endif +/* Needed for Unix version of run_with_timeout. */ +#include +#include #ifndef HAVE_SIGSETJMP /* If sigsetjmp is a macro, configure won't pick it up. */ @@ -91,22 +72,17 @@ so, delete this exception statement from your version. */ # endif #endif -#ifdef HAVE_SIGNAL -# ifdef HAVE_SIGSETJMP -# define USE_SIGNAL_TIMEOUT -# endif -# ifdef HAVE_SIGBLOCK -# define USE_SIGNAL_TIMEOUT -# endif +#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK +# define USE_SIGNAL_TIMEOUT #endif #include "wget.h" #include "utils.h" #include "hash.h" -#ifndef errno -extern int errno; -#endif +#ifdef TESTING +#include "test.h" +#endif /* Utility function: like xstrdup(), but also lowercases S. */ @@ -126,7 +102,7 @@ xstrdup_lower (const char *s) char * strdupdelim (const char *beg, const char *end) { - char *res = (char *)xmalloc (end - beg + 1); + char *res = xmalloc (end - beg + 1); memcpy (res, beg, end - beg); res[end - beg] = '\0'; return res; @@ -150,7 +126,7 @@ sepstring (const char *s) { if (*s == ',') { - res = (char **)xrealloc (res, (i + 2) * sizeof (char *)); + res = xrealloc (res, (i + 2) * sizeof (char *)); res[i] = strdupdelim (p, s); res[++i] = NULL; ++s; @@ -162,41 +138,55 @@ sepstring (const char *s) else ++s; } - res = (char **)xrealloc (res, (i + 2) * sizeof (char *)); + res = xrealloc (res, (i + 2) * sizeof (char *)); res[i] = strdupdelim (p, s); res[i + 1] = NULL; return res; } -#ifdef WGET_USE_STDARG -# define VA_START(args, arg1) va_start (args, arg1) -#else -# define VA_START(args, ignored) va_start (args) -#endif +/* Like sprintf, but prints into a string of sufficient size freshly + allocated with malloc, which is returned. If unable to print due + to invalid format, returns NULL. Inability to allocate needed + memory results in abort, as with xmalloc. This is in spirit + similar to the GNU/BSD extension asprintf, but somewhat easier to + use. -/* Like sprintf, but allocates a string of sufficient size with malloc - and returns it. GNU libc has a similar function named asprintf, - which requires the pointer to the string to be passed. */ + Internally the function either calls vasprintf or loops around + vsnprintf until the correct size is found. Since Wget also ships a + fallback implementation of vsnprintf, this should be portable. */ char * aprintf (const char *fmt, ...) { - /* This function is implemented using vsnprintf, which we provide - for the systems that don't have it. Therefore, it should be 100% - portable. */ +#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC + /* Use vasprintf. */ + int ret; + va_list args; + char *str; + va_start (args, fmt); + ret = vasprintf (&str, fmt, args); + va_end (args); + if (ret < 0 && errno == ENOMEM) + abort (); /* for consistency with xmalloc/xrealloc */ + else if (ret < 0) + return NULL; + return str; +#else /* not HAVE_VASPRINTF */ + /* vasprintf is unavailable. snprintf into a small buffer and + resize it as necessary. */ int size = 32; char *str = xmalloc (size); + /* #### This code will infloop and eventually abort in xrealloc if + passed a FMT that causes snprintf to consistently return -1. */ + while (1) { int n; va_list args; - /* See log_vprintf_internal for explanation why it's OK to rely - on the return value of vsnprintf. */ - - VA_START (args, fmt); + va_start (args, fmt); n = vsnprintf (str, size, fmt, args); va_end (args); @@ -211,7 +201,7 @@ aprintf (const char *fmt, ...) size <<= 1; /* twice the old size */ str = xrealloc (str, size); } - return NULL; /* unreached */ +#endif /* not HAVE_VASPRINTF */ } /* Concatenate the NULL-terminated list of string arguments into @@ -231,7 +221,7 @@ concat_strings (const char *str0, ...) /* Calculate the length of and allocate the resulting string. */ argcount = 0; - VA_START (args, str0); + va_start (args, str0); for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) { int len = strlen (next_str); @@ -245,7 +235,7 @@ concat_strings (const char *str0, ...) /* Copy the strings into the allocated space. */ argcount = 0; - VA_START (args, str0); + va_start (args, str0); for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) { int len; @@ -262,51 +252,38 @@ concat_strings (const char *str0, ...) return ret; } +/* Format the provided time according to the specified format. The + format is a string with format elements supported by strftime. */ + +static char * +fmttime (time_t t, const char *fmt) +{ + static char output[32]; + struct tm *tm = localtime(&t); + if (!tm) + abort (); + if (!strftime(output, sizeof(output), fmt, tm)) + abort (); + return output; +} + /* Return pointer to a static char[] buffer in which zero-terminated string-representation of TM (in form hh:mm:ss) is printed. If TM is NULL, the current time will be used. */ char * -time_str (time_t *tm) +time_str (time_t t) { - static char output[15]; - struct tm *ptm; - time_t secs = tm ? *tm : time (NULL); - - if (secs == -1) - { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; - } - ptm = localtime (&secs); - sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; + return fmttime(t, "%H:%M:%S"); } /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ char * -datetime_str (time_t *tm) +datetime_str (time_t t) { - static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */ - struct tm *ptm; - time_t secs = tm ? *tm : time (NULL); - - if (secs == -1) - { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; - } - ptm = localtime (&secs); - sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d", - ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday, - ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; + return fmttime(t, "%Y-%m-%d %H:%M:%S"); } /* The Windows versions of the following two functions are defined in @@ -318,7 +295,7 @@ fork_to_background (void) { pid_t pid; /* Whether we arrange our own version of opt.lfilename here. */ - int logfile_changed = 0; + bool logfile_changed = false; if (!opt.lfilename) { @@ -327,10 +304,10 @@ fork_to_background (void) use fopen_excl) or lying to the user about the log file name (which arises from using unique_name, printing the name, and using fopen_excl later on.) */ - FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, 0, &opt.lfilename); + FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename); if (new_log_fp) { - logfile_changed = 1; + logfile_changed = true; fclose (new_log_fp); } } @@ -344,7 +321,7 @@ fork_to_background (void) else if (pid != 0) { /* parent, no error */ - printf (_("Continuing in background, pid %d.\n"), (int)pid); + printf (_("Continuing in background, pid %d.\n"), (int) pid); if (logfile_changed) printf (_("Output will be written to `%s'.\n"), opt.lfilename); exit (0); /* #### should we use _exit()? */ @@ -358,19 +335,23 @@ fork_to_background (void) } #endif /* not WINDOWS */ -/* "Touch" FILE, i.e. make its atime and mtime equal to the time - specified with TM. */ +/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time + specified with TM. The atime ("access time") is set to the current + time. */ + void touch (const char *file, time_t tm) { #ifdef HAVE_STRUCT_UTIMBUF struct utimbuf times; - times.actime = times.modtime = tm; #else - time_t times[2]; - times[0] = times[1] = tm; + struct { + time_t actime; + time_t modtime; + } times; #endif - + times.modtime = tm; + times.actime = time (NULL); if (utime (file, ×) == -1) logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); } @@ -401,7 +382,7 @@ remove_link (const char *file) proper way should, of course, be to have a third, error state, other than true/false, but that would introduce uncalled-for additional complexity to the callers. */ -int +bool file_exists_p (const char *filename) { #ifdef HAVE_ACCESS @@ -414,15 +395,15 @@ file_exists_p (const char *filename) /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file). Returns 0 on error. */ -int +bool file_non_directory_p (const char *path) { struct_stat buf; /* Use lstat() rather than stat() so that symbolic links pointing to directories can be identified correctly. */ if (lstat (path, &buf) != 0) - return 0; - return S_ISDIR (buf.st_mode) ? 0 : 1; + return false; + return S_ISDIR (buf.st_mode) ? false : true; } /* Return the size of file named by FILENAME, or -1 if it cannot be @@ -490,7 +471,7 @@ unique_name_1 (const char *prefix) (and therefore doesn't need changing). */ char * -unique_name (const char *file, int allow_passthrough) +unique_name (const char *file, bool allow_passthrough) { /* If the FILE itself doesn't exist, return it without modification. */ @@ -508,15 +489,15 @@ unique_name (const char *file, int allow_passthrough) opening the file returned by unique_name. */ FILE * -unique_create (const char *name, int binary, char **opened_name) +unique_create (const char *name, bool binary, char **opened_name) { /* unique file name, based on NAME */ - char *uname = unique_name (name, 0); + char *uname = unique_name (name, false); FILE *fp; while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST) { xfree (uname); - uname = unique_name (name, 0); + uname = unique_name (name, false); } if (opened_name && fp != NULL) { @@ -544,7 +525,7 @@ unique_create (const char *name, int binary, char **opened_name) appropriately. */ FILE * -fopen_excl (const char *fname, int binary) +fopen_excl (const char *fname, bool binary) { int fd; #ifdef O_EXCL @@ -617,7 +598,7 @@ make_directory (const char *directory) file_merge("/foo/bar/", "baz") => "/foo/bar/baz" file_merge("foo", "bar") => "bar" - In other words, it's a simpler and gentler version of uri_merge_1. */ + In other words, it's a simpler and gentler version of uri_merge. */ char * file_merge (const char *base, const char *file) @@ -628,7 +609,7 @@ file_merge (const char *base, const char *file) if (!cut) return xstrdup (file); - result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1); + result = xmalloc (cut - base + 1 + strlen (file) + 1); memcpy (result, base, cut - base); result[cut - base] = '/'; strcpy (result + (cut - base) + 1, file); @@ -636,11 +617,35 @@ file_merge (const char *base, const char *file) return result; } -static int in_acclist PARAMS ((const char *const *, const char *, int)); +/* Like fnmatch, but performs a case-insensitive match. */ + +int +fnmatch_nocase (const char *pattern, const char *string, int flags) +{ +#ifdef FNM_CASEFOLD + /* The FNM_CASEFOLD flag started as a GNU extension, but it is now + also present on *BSD platforms, and possibly elsewhere. */ + return fnmatch (pattern, string, flags | FNM_CASEFOLD); +#else + /* Turn PATTERN and STRING to lower case and call fnmatch on them. */ + char *patcopy = (char *) alloca (strlen (pattern) + 1); + char *strcopy = (char *) alloca (strlen (string) + 1); + char *p; + for (p = patcopy; *pattern; pattern++, p++) + *p = TOLOWER (*pattern); + *p = '\0'; + for (p = strcopy; *string; string++, p++) + *p = TOLOWER (*string); + *p = '\0'; + return fnmatch (patcopy, strcopy, flags); +#endif +} + +static bool in_acclist (const char *const *, const char *, bool); /* Determine whether a file is acceptable to be followed, according to lists of patterns to accept/reject. */ -int +bool acceptable (const char *s) { int l = strlen (s); @@ -652,90 +657,103 @@ acceptable (const char *s) if (opt.accepts) { if (opt.rejects) - return (in_acclist ((const char *const *)opt.accepts, s, 1) - && !in_acclist ((const char *const *)opt.rejects, s, 1)); + return (in_acclist ((const char *const *)opt.accepts, s, true) + && !in_acclist ((const char *const *)opt.rejects, s, true)); else - return in_acclist ((const char *const *)opt.accepts, s, 1); + return in_acclist ((const char *const *)opt.accepts, s, true); } else if (opt.rejects) - return !in_acclist ((const char *const *)opt.rejects, s, 1); - return 1; + return !in_acclist ((const char *const *)opt.rejects, s, true); + return true; } -/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is - `/something', frontcmp() will return 1 only if S2 begins with - `/something'. Otherwise, 0 is returned. */ -int -frontcmp (const char *s1, const char *s2) +/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() + will return true if and only if D2 begins with `/something/' or is exactly + '/something'. */ +bool +subdir_p (const char *d1, const char *d2) { - for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2); - return !*s1; + if (!opt.ignore_case) + for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2) + ; + else + for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2) + ; + + return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); } -/* Iterate through STRLIST, and return the first element that matches - S, through wildcards or front comparison (as appropriate). */ -static char * -proclist (char **strlist, const char *s, enum accd flags) +/* Iterate through DIRLIST (which must be NULL-terminated), and return the + first element that matches DIR, through wildcards or front comparison (as + appropriate). */ +static bool +dir_matches_p (char **dirlist, const char *dir) { char **x; + int (*matcher) (const char *, const char *, int) + = opt.ignore_case ? fnmatch_nocase : fnmatch; - for (x = strlist; *x; x++) - if (has_wildcards_p (*x)) - { - if (fnmatch (*x, s, FNM_PATHNAME) == 0) - break; - } - else - { - char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */ - if (frontcmp (p, s)) - break; - } - return *x; + for (x = dirlist; *x; x++) + { + /* Remove leading '/' */ + char *p = *x + (**x == '/'); + if (has_wildcards_p (p)) + { + if (matcher (p, dir, FNM_PATHNAME) == 0) + break; + } + else + { + if (subdir_p (p, dir)) + break; + } + } + + return *x ? true : false; } /* Returns whether DIRECTORY is acceptable for download, wrt the include/exclude lists. - If FLAGS is ALLABS, the leading `/' is ignored in paths; relative - and absolute paths may be freely intermixed. */ -int -accdir (const char *directory, enum accd flags) + The leading `/' is ignored in paths; relative and absolute paths + may be freely intermixed. */ + +bool +accdir (const char *directory) { /* Remove starting '/'. */ - if (flags & ALLABS && *directory == '/') + if (*directory == '/') ++directory; if (opt.includes) { - if (!proclist (opt.includes, directory, flags)) - return 0; + if (!dir_matches_p (opt.includes, directory)) + return false; } if (opt.excludes) { - if (proclist (opt.excludes, directory, flags)) - return 0; + if (dir_matches_p (opt.excludes, directory)) + return false; } - return 1; + return true; } -/* Return non-zero if STRING ends with TAIL. For instance: +/* Return true if STRING ends with TAIL. For instance: - match_tail ("abc", "bc", 0) -> 1 - match_tail ("abc", "ab", 0) -> 0 - match_tail ("abc", "abc", 0) -> 1 + match_tail ("abc", "bc", false) -> 1 + match_tail ("abc", "ab", false) -> 0 + match_tail ("abc", "abc", false) -> 1 - If FOLD_CASE_P is non-zero, the comparison will be - case-insensitive. */ + If FOLD_CASE is true, the comparison will be case-insensitive. */ -int -match_tail (const char *string, const char *tail, int fold_case_p) +bool +match_tail (const char *string, const char *tail, bool fold_case) { int i, j; /* We want this to be fast, so we code two loops, one with case-folding, one without. */ - if (!fold_case_p) + if (!fold_case) { for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) if (string[i] != tail[j]) @@ -750,44 +768,47 @@ match_tail (const char *string, const char *tail, int fold_case_p) /* If the tail was exhausted, the match was succesful. */ if (j == -1) - return 1; + return true; else - return 0; + return false; } /* Checks whether string S matches each element of ACCEPTS. A list element are matched either with fnmatch() or match_tail(), according to whether the element contains wildcards or not. - If the BACKWARD is 0, don't do backward comparison -- just compare + If the BACKWARD is false, don't do backward comparison -- just compare them normally. */ -static int -in_acclist (const char *const *accepts, const char *s, int backward) +static bool +in_acclist (const char *const *accepts, const char *s, bool backward) { for (; *accepts; accepts++) { if (has_wildcards_p (*accepts)) { - /* fnmatch returns 0 if the pattern *does* match the - string. */ - if (fnmatch (*accepts, s, 0) == 0) - return 1; + int res = opt.ignore_case + ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0); + /* fnmatch returns 0 if the pattern *does* match the string. */ + if (res == 0) + return true; } else { if (backward) { - if (match_tail (s, *accepts, 0)) - return 1; + if (match_tail (s, *accepts, opt.ignore_case)) + return true; } else { - if (!strcmp (s, *accepts)) - return 1; + int cmp = opt.ignore_case + ? strcasecmp (s, *accepts) : strcmp (s, *accepts); + if (cmp == 0) + return true; } } } - return 0; + return false; } /* Return the location of STR's suffix (file extension). Examples: @@ -809,20 +830,21 @@ suffix (const char *str) return NULL; } -/* Return non-zero if S contains globbing wildcards (`*', `?', `[' or +/* Return true if S contains globbing wildcards (`*', `?', `[' or `]'). */ -int +bool has_wildcards_p (const char *s) { for (; *s; s++) if (*s == '*' || *s == '?' || *s == '[' || *s == ']') - return 1; - return 0; + return true; + return false; } -/* Return non-zero if FNAME ends with a typical HTML suffix. The - following (case-insensitive) suffixes are presumed to be HTML files: +/* Return true if FNAME ends with a typical HTML suffix. The + following (case-insensitive) suffixes are presumed to be HTML + files: html htm @@ -830,20 +852,20 @@ has_wildcards_p (const char *s) #### CAVEAT. This is not necessarily a good indication that FNAME refers to a file that contains HTML! */ -int +bool has_html_suffix_p (const char *fname) { char *suf; if ((suf = suffix (fname)) == NULL) - return 0; + return false; if (!strcasecmp (suf, "html")) - return 1; + return true; if (!strcasecmp (suf, "htm")) - return 1; + return true; if (suf[0] && !strcasecmp (suf + 1, "html")) - return 1; - return 0; + return true; + return false; } /* Read a line from FP and return the pointer to freshly allocated @@ -863,7 +885,7 @@ read_whole_line (FILE *fp) { int length = 0; int bufsize = 82; - char *line = (char *)xmalloc (bufsize); + char *line = xmalloc (bufsize); while (fgets (line + length, bufsize - length, fp)) { @@ -918,14 +940,14 @@ read_file (const char *file) int fd; struct file_memory *fm; long size; - int inhibit_close = 0; + bool inhibit_close = false; /* Some magic in the finest tradition of Perl and its kin: if FILE is "-", just use stdin. */ if (HYPHENP (file)) { fd = fileno (stdin); - inhibit_close = 1; + inhibit_close = true; /* Note that we don't inhibit mmap() in this case. If stdin is redirected from a regular file, mmap() will still work. */ } @@ -937,7 +959,7 @@ read_file (const char *file) #ifdef HAVE_MMAP { - struct_stat buf; + struct_fstat buf; if (fstat (fd, &buf) < 0) goto mmap_lose; fm->length = buf.st_size; @@ -1071,15 +1093,41 @@ merge_vecs (char **v1, char **v2) return v1; } /* Count v1. */ - for (i = 0; v1[i]; i++); + for (i = 0; v1[i]; i++) + ; /* Count v2. */ - for (j = 0; v2[j]; j++); + for (j = 0; v2[j]; j++) + ; /* Reallocate v1. */ - v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **)); + v1 = xrealloc (v1, (i + j + 1) * sizeof (char **)); memcpy (v1 + i, v2, (j + 1) * sizeof (char *)); xfree (v2); return v1; } + +/* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it + is allocated as needed. Return the new value of the vector. */ + +char ** +vec_append (char **vec, const char *str) +{ + int cnt; /* count of vector elements, including + the one we're about to append */ + if (vec != NULL) + { + for (cnt = 0; vec[cnt]; cnt++) + ; + ++cnt; + } + else + cnt = 1; + /* Reallocate the array to fit the new element and the NULL. */ + vec = xrealloc (vec, (cnt + 1) * sizeof (char *)); + /* Append a copy of STR to the vector. */ + vec[cnt - 1] = xstrdup (str); + vec[cnt] = NULL; + return vec; +} /* Sometimes it's useful to create "sets" of strings, i.e. special hash tables where you want to store strings as keys and merely @@ -1110,128 +1158,143 @@ string_set_contains (struct hash_table *ht, const char *s) return hash_table_contains (ht, s); } -static int -string_set_to_array_mapper (void *key, void *value_ignored, void *arg) -{ - char ***arrayptr = (char ***) arg; - *(*arrayptr)++ = (char *) key; - return 0; -} - /* Convert the specified string set to array. ARRAY should be large enough to hold hash_table_count(ht) char pointers. */ void string_set_to_array (struct hash_table *ht, char **array) { - hash_table_map (ht, string_set_to_array_mapper, &array); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + *array++ = iter.key; } -static int -string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) -{ - xfree (key); - return 0; -} +/* Free the string set. This frees both the storage allocated for + keys and the actual hash table. (hash_table_destroy would only + destroy the hash table.) */ void string_set_free (struct hash_table *ht) { - hash_table_map (ht, string_set_free_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + xfree (iter.key); hash_table_destroy (ht); } -static int -free_keys_and_values_mapper (void *key, void *value, void *arg_ignored) -{ - xfree (key); - xfree (value); - return 0; -} - -/* Another utility function: call free() on all keys and values of HT. */ +/* Utility function: simply call xfree() on all keys and values of HT. */ void free_keys_and_values (struct hash_table *ht) { - hash_table_map (ht, free_keys_and_values_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + { + xfree (iter.key); + xfree (iter.value); + } } - -/* Add thousand separators to a number already in string form. Used - by with_thousand_seps and with_thousand_seps_large. */ - -static char * -add_thousand_seps (const char *repr) -{ - static char outbuf[48]; - int i, i1, mod; - char *outptr; - const char *inptr; +/* Get digit grouping data for thousand separors by calling + localeconv(). The data includes separator string and grouping info + and is cached after the first call to the function. - /* Reset the pointers. */ - outptr = outbuf; - inptr = repr; + In locales that don't set a thousand separator (such as the "C" + locale), this forces it to be ",". We are now only showing + thousand separators in one place, so this shouldn't be a problem in + practice. */ - /* Ignore the sign for the purpose of adding thousand - separators. */ - if (*inptr == '-') - { - *outptr++ = '-'; - ++inptr; - } - /* How many digits before the first separator? */ - mod = strlen (inptr) % 3; - /* Insert them. */ - for (i = 0; i < mod; i++) - *outptr++ = inptr[i]; - /* Now insert the rest of them, putting separator before every - third digit. */ - for (i1 = i, i = 0; inptr[i1]; i++, i1++) +static void +get_grouping_data (const char **sep, const char **grouping) +{ + static const char *cached_sep; + static const char *cached_grouping; + static bool initialized; + if (!initialized) { - if (i % 3 == 0 && i1 != 0) - *outptr++ = ','; - *outptr++ = inptr[i1]; + /* Get the grouping info from the locale. */ + struct lconv *lconv = localeconv (); + cached_sep = lconv->thousands_sep; + cached_grouping = lconv->grouping; + if (!*cached_sep) + { + /* Many locales (such as "C" or "hr_HR") don't specify + grouping, which we still want to use it for legibility. + In those locales set the sep char to ',', unless that + character is used for decimal point, in which case set it + to ".". */ + if (*lconv->decimal_point != ',') + cached_sep = ","; + else + cached_sep = "."; + cached_grouping = "\x03"; + } + initialized = true; } - /* Zero-terminate the string. */ - *outptr = '\0'; - return outbuf; + *sep = cached_sep; + *grouping = cached_grouping; } -/* Return a static pointer to the number printed with thousand - separators inserted at the right places. */ +/* Return a printed representation of N with thousand separators. + This should respect locale settings, with the exception of the "C" + locale which mandates no separator, but we use one anyway. -char * -with_thousand_seps (wgint l) + Unfortunately, we cannot use %'d (in fact it would be %'j) to get + the separators because it's too non-portable, and it's hard to test + for this feature at configure time. Besides, it wouldn't display + separators in the "C" locale, still used by many Unix users. */ + +const char * +with_thousand_seps (wgint n) { - char inbuf[24]; - /* Print the number into the buffer. */ - number_to_string (inbuf, l); - return add_thousand_seps (inbuf); -} + static char outbuf[48]; + char *p = outbuf + sizeof outbuf; -/* Write a string representation of LARGE_INT NUMBER into the provided - buffer. + /* Info received from locale */ + const char *grouping, *sep; + int seplen; - It would be dangerous to use sprintf, because the code wouldn't - work on a machine with gcc-provided long long support, but without - libc support for "%lld". However, such old systems platforms - typically lack snprintf and will end up using our version, which - does support "%lld" whereever long longs are available. */ + /* State information */ + int i = 0, groupsize; + const char *atgroup; -static void -large_int_to_string (char *buffer, int bufsize, LARGE_INT number) -{ - snprintf (buffer, bufsize, LARGE_INT_FMT, number); -} + bool negative = n < 0; -/* The same as with_thousand_seps, but works on LARGE_INT. */ + /* Initialize grouping data. */ + get_grouping_data (&sep, &grouping); + seplen = strlen (sep); + atgroup = grouping; + groupsize = *atgroup++; -char * -with_thousand_seps_large (LARGE_INT l) -{ - char inbuf[48]; - large_int_to_string (inbuf, sizeof (inbuf), l); - return add_thousand_seps (inbuf); + /* This would overflow on WGINT_MIN, but printing negative numbers + is not an important goal of this fuinction. */ + if (negative) + n = -n; + + /* Write the number into the buffer, backwards, inserting the + separators as necessary. */ + *--p = '\0'; + while (1) + { + *--p = n % 10 + '0'; + n /= 10; + if (n == 0) + break; + /* Prepend SEP to every groupsize'd digit and get new groupsize. */ + if (++i == groupsize) + { + if (seplen == 1) + *--p = *sep; + else + memcpy (p -= seplen, sep, seplen); + i = 0; + if (*atgroup) + groupsize = *atgroup++; + } + } + if (negative) + *--p = '-'; + + return p; } /* N, a byte quantity, is converted to a human-readable abberviated @@ -1245,14 +1308,13 @@ with_thousand_seps_large (LARGE_INT l) usually improves readability." This intentionally uses kilobyte (KB), megabyte (MB), etc. in their - original computer science meaning of "multiples of 1024". - Multiples of 1000 would be useless since Wget already adds thousand - separators for legibility. We don't use the "*bibyte" names - invented in 1998, and seldom used in practice. Wikipedia's entry - on kilobyte discusses this in some detail. */ + original computer-related meaning of "powers of 1024". We don't + use the "*bibyte" names invented in 1998, and seldom used in + practice. Wikipedia's entry on "binary prefix" discusses this in + some detail. */ char * -human_readable (wgint n) +human_readable (HR_NUMTYPE n) { /* These suffixes are compatible with those of GNU `ls -lh'. */ static char powers[] = @@ -1282,19 +1344,16 @@ human_readable (wgint n) /* At each iteration N is greater than the *subsequent* power. That way N/1024.0 produces a decimal number in the units of *this* power. */ - if ((n >> 10) < 1024 || i == countof (powers) - 1) + if ((n / 1024) < 1024 || i == countof (powers) - 1) { - /* Must cast to long first because MS VC can't directly cast - __int64 to double. (This is safe because N is known to - be <2**20.) */ - double val = (double) (long) n / 1024.0; + double val = n / 1024.0; /* Print values smaller than 10 with one decimal digits, and others without any decimals. */ snprintf (buf, sizeof (buf), "%.*f%c", val < 10 ? 1 : 0, val, powers[i]); return buf; } - n >>= 10; + n /= 1024; } return NULL; /* unreached */ } @@ -1343,24 +1402,6 @@ numdigit (wgint number) #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10) #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10) -/* SPRINTF_WGINT is used by number_to_string to handle pathological - cases and to portably support strange sizes of wgint. Ideally this - would just use "%j" and intmax_t, but many systems don't support - it, so it's used only if nothing else works. */ -#if SIZEOF_LONG >= SIZEOF_WGINT -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n)) -#else -# if SIZEOF_LONG_LONG >= SIZEOF_WGINT -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n)) -# else -# ifdef WINDOWS -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64", (__int64) (n)) -# else -# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n)) -# endif -# endif -#endif - /* Shorthand for casting to wgint. */ #define W wgint @@ -1370,15 +1411,15 @@ numdigit (wgint number) The speedup may make a difference in programs that frequently convert numbers to strings. Some implementations of sprintf, - particularly the one in GNU libc, have been known to be extremely - slow when converting integers to strings. + particularly the one in some versions of GNU libc, have been known + to be quite slow when converting integers to strings. Return the pointer to the location where the terminating zero was printed. (Equivalent to calling buffer+strlen(buffer) after the function is done.) - BUFFER should be big enough to accept as many bytes as you expect - the number to take up. On machines with 64-bit longs the maximum + BUFFER should be large enough to accept as many bytes as you expect + the number to take up. On machines with 64-bit wgints the maximum needed size is 24 bytes. That includes the digits needed for the largest 64-bit number, the `-' sign in case it's negative, and the terminating '\0'. */ @@ -1389,21 +1430,29 @@ number_to_string (char *buffer, wgint number) char *p = buffer; wgint n = number; + int last_digit_char = 0; + #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8) - /* We are running in a strange or misconfigured environment. Let - sprintf cope with it. */ - SPRINTF_WGINT (buffer, n); - p += strlen (buffer); + /* We are running in a very strange environment. Leave the correct + printing to sprintf. */ + p += sprintf (buf, "%j", (intmax_t) (n)); #else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ if (n < 0) { if (n < -WGINT_MAX) { - /* -n would overflow. Have sprintf deal with this. */ - SPRINTF_WGINT (buffer, n); - p += strlen (buffer); - return p; + /* n = -n would overflow because -n would evaluate to a + wgint value larger than WGINT_MAX. Need to make n + smaller and handle the last digit separately. */ + int last_digit = n % 10; + /* The sign of n%10 is implementation-defined. */ + if (last_digit < 0) + last_digit_char = '0' - last_digit; + else + last_digit_char = '0' + last_digit; + /* After n is made smaller, -n will not overflow. */ + n /= 10; } *p++ = '-'; @@ -1427,10 +1476,10 @@ number_to_string (char *buffer, wgint number) /* wgint is 32 bits wide: no number has more than 10 digits. */ else DIGITS_10 (1000000000); #else - /* wgint is 64 bits wide: handle numbers with more than 9 decimal - digits. Constants are constructed by compile-time multiplication - to avoid dealing with different notations for 64-bit constants - (nnnL, nnnLL, and nnnI64, depending on the compiler). */ + /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits. + Constants are constructed by compile-time multiplication to avoid + dealing with different notations for 64-bit constants + (nL/nLL/nI64, depending on the compiler and architecture). */ else if (n < 10*(W)1000000000) DIGITS_10 (1000000000); else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000); else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000); @@ -1443,6 +1492,9 @@ number_to_string (char *buffer, wgint number) else DIGITS_19 (1000000000*(W)1000000000); #endif + if (last_digit_char) + *p++ = last_digit_char; + *p = '\0'; #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ @@ -1451,6 +1503,7 @@ number_to_string (char *buffer, wgint number) #undef PR #undef W +#undef SPRINTF_WGINT #undef DIGITS_1 #undef DIGITS_2 #undef DIGITS_3 @@ -1515,352 +1568,6 @@ number_to_static_string (wgint number) return buf; } -/* Support for timers. */ - -#undef TIMER_WINDOWS -#undef TIMER_GETTIMEOFDAY -#undef TIMER_TIME - -/* Depending on the OS and availability of gettimeofday(), one and - only one of the above constants will be defined. Virtually all - modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will - use TIMER_WINDOWS. TIMER_TIME is a catch-all method for - non-Windows systems without gettimeofday. */ - -#ifdef WINDOWS -# define TIMER_WINDOWS -#else /* not WINDOWS */ -# ifdef HAVE_GETTIMEOFDAY -# define TIMER_GETTIMEOFDAY -# else -# define TIMER_TIME -# endif -#endif /* not WINDOWS */ - -#ifdef TIMER_GETTIMEOFDAY -typedef struct timeval wget_sys_time; -#endif - -#ifdef TIMER_TIME -typedef time_t wget_sys_time; -#endif - -#ifdef TIMER_WINDOWS -typedef union { - DWORD lores; /* In case GetTickCount is used */ - LARGE_INTEGER hires; /* In case high-resolution timer is used */ -} wget_sys_time; -#endif - -struct wget_timer { - /* Whether the start time has been initialized. */ - int initialized; - - /* The starting point in time which, subtracted from the current - time, yields elapsed time. */ - wget_sys_time start; - - /* The most recent elapsed time, calculated by wtimer_update(). - Measured in milliseconds. */ - double elapsed_last; - - /* Approximately, the time elapsed between the true start of the - measurement and the time represented by START. */ - double elapsed_pre_start; -}; - -#ifdef TIMER_WINDOWS - -/* Whether high-resolution timers are used. Set by wtimer_initialize_once - the first time wtimer_allocate is called. */ -static int using_hires_timers; - -/* Frequency of high-resolution timers -- number of updates per - millisecond. Calculated the first time wtimer_allocate is called - provided that high-resolution timers are available. */ -static double hires_millisec_freq; - -/* The first time a timer is created, determine whether to use - high-resolution timers. */ - -static void -wtimer_initialize_once (void) -{ - static int init_done; - if (!init_done) - { - LARGE_INTEGER freq; - init_done = 1; - freq.QuadPart = 0; - QueryPerformanceFrequency (&freq); - if (freq.QuadPart != 0) - { - using_hires_timers = 1; - hires_millisec_freq = (double) freq.QuadPart / 1000.0; - } - } -} -#endif /* TIMER_WINDOWS */ - -/* Allocate a timer. Calling wtimer_read on the timer will return - zero. It is not legal to call wtimer_update with a freshly - allocated timer -- use wtimer_reset first. */ - -struct wget_timer * -wtimer_allocate (void) -{ - struct wget_timer *wt = xnew (struct wget_timer); - xzero (*wt); - -#ifdef TIMER_WINDOWS - wtimer_initialize_once (); -#endif - - return wt; -} - -/* Allocate a new timer and reset it. Return the new timer. */ - -struct wget_timer * -wtimer_new (void) -{ - struct wget_timer *wt = wtimer_allocate (); - wtimer_reset (wt); - return wt; -} - -/* Free the resources associated with the timer. Its further use is - prohibited. */ - -void -wtimer_delete (struct wget_timer *wt) -{ - xfree (wt); -} - -/* Store system time to WST. */ - -static void -wtimer_sys_set (wget_sys_time *wst) -{ -#ifdef TIMER_GETTIMEOFDAY - gettimeofday (wst, NULL); -#endif - -#ifdef TIMER_TIME - time (wst); -#endif - -#ifdef TIMER_WINDOWS - if (using_hires_timers) - { - QueryPerformanceCounter (&wst->hires); - } - else - { - /* Where hires counters are not available, use GetTickCount rather - GetSystemTime, because it is unaffected by clock skew and simpler - to use. Note that overflows don't affect us because we never use - absolute values of the ticker, only the differences. */ - wst->lores = GetTickCount (); - } -#endif -} - -/* Reset timer WT. This establishes the starting point from which - wtimer_read() will return the number of elapsed milliseconds. - It is allowed to reset a previously used timer. */ - -void -wtimer_reset (struct wget_timer *wt) -{ - /* Set the start time to the current time. */ - wtimer_sys_set (&wt->start); - wt->elapsed_last = 0; - wt->elapsed_pre_start = 0; - wt->initialized = 1; -} - -static double -wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2) -{ -#ifdef TIMER_GETTIMEOFDAY - return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000 - + (double)(wst1->tv_usec - wst2->tv_usec) / 1000); -#endif - -#ifdef TIMER_TIME - return 1000 * (*wst1 - *wst2); -#endif - -#ifdef WINDOWS - if (using_hires_timers) - return (wst1->hires.QuadPart - wst2->hires.QuadPart) / hires_millisec_freq; - else - return wst1->lores - wst2->lores; -#endif -} - -/* Update the timer's elapsed interval. This function causes the - timer to call gettimeofday (or time(), etc.) to update its idea of - current time. To get the elapsed interval in milliseconds, use - wtimer_read. - - This function handles clock skew, i.e. time that moves backwards is - ignored. */ - -void -wtimer_update (struct wget_timer *wt) -{ - wget_sys_time now; - double elapsed; - - assert (wt->initialized != 0); - - wtimer_sys_set (&now); - elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start); - - /* Ideally we'd just return the difference between NOW and - wt->start. However, the system timer can be set back, and we - could return a value smaller than when we were last called, even - a negative value. Both of these would confuse the callers, which - expect us to return monotonically nondecreasing values. - - Therefore: if ELAPSED is smaller than its previous known value, - we reset wt->start to the current time and effectively start - measuring from this point. But since we don't want the elapsed - value to start from zero, we set elapsed_pre_start to the last - elapsed time and increment all future calculations by that - amount. */ - - if (elapsed < wt->elapsed_last) - { - wt->start = now; - wt->elapsed_pre_start = wt->elapsed_last; - elapsed = wt->elapsed_last; - } - - wt->elapsed_last = elapsed; -} - -/* Return the elapsed time in milliseconds between the last call to - wtimer_reset and the last call to wtimer_update. - - A typical use of the timer interface would be: - - struct wtimer *timer = wtimer_new (); - ... do something that takes a while ... - wtimer_update (); - double msecs = wtimer_read (); */ - -double -wtimer_read (const struct wget_timer *wt) -{ - return wt->elapsed_last; -} - -/* Return the assessed granularity of the timer implementation, in - milliseconds. This is used by code that tries to substitute a - better value for timers that have returned zero. */ - -double -wtimer_granularity (void) -{ -#ifdef TIMER_GETTIMEOFDAY - /* Granularity of gettimeofday varies wildly between architectures. - However, it appears that on modern machines it tends to be better - than 1ms. Assume 100 usecs. (Perhaps the configure process - could actually measure this?) */ - return 0.1; -#endif - -#ifdef TIMER_TIME - return 1000; -#endif - -#ifdef TIMER_WINDOWS - if (using_hires_timers) - return 1.0 / hires_millisec_freq; - else - return 10; /* according to MSDN */ -#endif -} - -/* This should probably be at a better place, but it doesn't really - fit into html-parse.c. */ - -/* The function returns the pointer to the malloc-ed quoted version of - string s. It will recognize and quote numeric and special graphic - entities, as per RFC1866: - - `&' -> `&' - `<' -> `<' - `>' -> `>' - `"' -> `"' - SP -> ` ' - - No other entities are recognized or replaced. */ -char * -html_quote_string (const char *s) -{ - const char *b = s; - char *p, *res; - int i; - - /* Pass through the string, and count the new size. */ - for (i = 0; *s; s++, i++) - { - if (*s == '&') - i += 4; /* `amp;' */ - else if (*s == '<' || *s == '>') - i += 3; /* `lt;' and `gt;' */ - else if (*s == '\"') - i += 5; /* `quot;' */ - else if (*s == ' ') - i += 4; /* #32; */ - } - res = (char *)xmalloc (i + 1); - s = b; - for (p = res; *s; s++) - { - switch (*s) - { - case '&': - *p++ = '&'; - *p++ = 'a'; - *p++ = 'm'; - *p++ = 'p'; - *p++ = ';'; - break; - case '<': case '>': - *p++ = '&'; - *p++ = (*s == '<' ? 'l' : 'g'); - *p++ = 't'; - *p++ = ';'; - break; - case '\"': - *p++ = '&'; - *p++ = 'q'; - *p++ = 'u'; - *p++ = 'o'; - *p++ = 't'; - *p++ = ';'; - break; - case ' ': - *p++ = '&'; - *p++ = '#'; - *p++ = '3'; - *p++ = '2'; - *p++ = ';'; - break; - default: - *p++ = *s; - } - } - *p = '\0'; - return res; -} - /* Determine the width of the terminal we're running on. If that's not possible, return 0. */ @@ -1881,79 +1588,83 @@ determine_screen_width (void) return 0; /* most likely ENOTTY */ return wsz.ws_col; -#else /* not TIOCGWINSZ */ -# ifdef WINDOWS +#elif defined(WINDOWS) CONSOLE_SCREEN_BUFFER_INFO csbi; if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi)) return 0; return csbi.dwSize.X; -# else /* neither WINDOWS nor TIOCGWINSZ */ +#else /* neither TIOCGWINSZ nor WINDOWS */ return 0; -#endif /* neither WINDOWS nor TIOCGWINSZ */ -#endif /* not TIOCGWINSZ */ +#endif /* neither TIOCGWINSZ nor WINDOWS */ } + +/* Whether the rnd system (either rand or [dl]rand48) has been + seeded. */ +static int rnd_seeded; /* Return a random number between 0 and MAX-1, inclusive. - If MAX is greater than the value of RAND_MAX+1 on the system, the - returned value will be in the range [0, RAND_MAX]. This may be - fixed in a future release. - + If the system does not support lrand48 and MAX is greater than the + value of RAND_MAX+1 on the system, the returned value will be in + the range [0, RAND_MAX]. This may be fixed in a future release. The random number generator is seeded automatically the first time it is called. - This uses rand() for portability. It has been suggested that - random() offers better randomness, but this is not required for - Wget, so I chose to go for simplicity and use rand - unconditionally. - - DO NOT use this for cryptographic purposes. It is only meant to be - used in situations where quality of the random numbers returned - doesn't really matter. */ + This uses lrand48 where available, rand elsewhere. DO NOT use it + for cryptography. It is only meant to be used in situations where + quality of the random numbers returned doesn't really matter. */ int random_number (int max) { - static int seeded; +#ifdef HAVE_DRAND48 + if (!rnd_seeded) + { + srand48 ((long) time (NULL) ^ (long) getpid ()); + rnd_seeded = 1; + } + return lrand48 () % max; +#else /* not HAVE_DRAND48 */ + double bounded; int rnd; - - if (!seeded) + if (!rnd_seeded) { - srand (time (NULL)); - seeded = 1; + srand ((unsigned) time (NULL) ^ (unsigned) getpid ()); + rnd_seeded = 1; } rnd = rand (); - /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1, - and enforce that assumption by masking other bits. */ -#ifndef RAND_MAX -# define RAND_MAX 32767 - rnd &= RAND_MAX; -#endif + /* Like rand() % max, but uses the high-order bits for better + randomness on architectures where rand() is implemented using a + simple congruential generator. */ - /* This is equivalent to rand() % max, but uses the high-order bits - for better randomness on architecture where rand() is implemented - using a simple congruential generator. */ + bounded = (double) max * rnd / (RAND_MAX + 1.0); + return (int) bounded; - bounded = (double)max * rnd / (RAND_MAX + 1.0); - return (int)bounded; +#endif /* not HAVE_DRAND48 */ } /* Return a random uniformly distributed floating point number in the - [0, 1) range. The precision of returned numbers is 9 digits. - - Modify this to use erand48() where available! */ + [0, 1) range. Uses drand48 where available, and a really lame + kludge elsewhere. */ double random_float (void) { - /* We can't rely on any specific value of RAND_MAX, but I'm pretty - sure it's greater than 1000. */ - int rnd1 = random_number (1000); - int rnd2 = random_number (1000); - int rnd3 = random_number (1000); - return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0; +#ifdef HAVE_DRAND48 + if (!rnd_seeded) + { + srand48 ((long) time (NULL) ^ (long) getpid ()); + rnd_seeded = 1; + } + return drand48 (); +#else /* not HAVE_DRAND48 */ + return ( random_number (10000) / 10000.0 + + random_number (10000) / (10000.0 * 10000.0) + + random_number (10000) / (10000.0 * 10000.0 * 10000.0) + + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0)); +#endif /* not HAVE_DRAND48 */ } /* Implementation of run_with_timeout, a generic timeout-forcing @@ -1965,7 +1676,7 @@ random_float (void) static sigjmp_buf run_with_timeout_env; -static RETSIGTYPE +static void abort_run_with_timeout (int sig) { assert (sig == SIGALRM); @@ -1976,7 +1687,7 @@ abort_run_with_timeout (int sig) static jmp_buf run_with_timeout_env; -static RETSIGTYPE +static void abort_run_with_timeout (int sig) { assert (sig == SIGALRM); @@ -2042,8 +1753,8 @@ alarm_cancel (void) } /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT - seconds. Returns non-zero if the function was interrupted with a - timeout, zero otherwise. + seconds. Returns true if the function was interrupted with a + timeout, false otherwise. This works by setting up SIGALRM to be delivered in TIMEOUT seconds using setitimer() or alarm(). The timeout is enforced by @@ -2068,7 +1779,7 @@ alarm_cancel (void) are normally freed prior to exit from the functions, they will be lost in case of timeout. */ -int +bool run_with_timeout (double timeout, void (*fun) (void *), void *arg) { int saved_errno; @@ -2076,7 +1787,7 @@ run_with_timeout (double timeout, void (*fun) (void *), void *arg) if (timeout == 0) { fun (arg); - return 0; + return false; } signal (SIGALRM, abort_run_with_timeout); @@ -2084,7 +1795,7 @@ run_with_timeout (double timeout, void (*fun) (void *), void *arg) { /* Longjumped out of FUN with a timeout. */ signal (SIGALRM, SIG_DFL); - return 1; + return true; } alarm_set (timeout); fun (arg); @@ -2095,7 +1806,7 @@ run_with_timeout (double timeout, void (*fun) (void *), void *arg) signal (SIGALRM, SIG_DFL); errno = saved_errno; - return 0; + return false; } #else /* not USE_SIGNAL_TIMEOUT */ @@ -2109,7 +1820,7 @@ int run_with_timeout (double timeout, void (*fun) (void *), void *arg) { fun (arg); - return 0; + return false; } #endif /* not WINDOWS */ #endif /* not USE_SIGNAL_TIMEOUT */ @@ -2135,8 +1846,7 @@ xsleep (double seconds) /* If nanosleep has been interrupted by a signal, adjust the sleeping period and return to sleep. */ sleep = remaining; -#else /* not HAVE_NANOSLEEP */ -#ifdef HAVE_USLEEP +#elif defined(HAVE_USLEEP) /* If usleep is available, use it in preference to select. */ if (seconds >= 1) { @@ -2147,13 +1857,11 @@ xsleep (double seconds) seconds -= (long) seconds; } usleep (seconds * 1000000); -#else /* not HAVE_USLEEP */ -#ifdef HAVE_SELECT - /* Note that, although Windows supports select, this sleeping - strategy doesn't work there because Winsock's select doesn't - implement timeout when it is passed NULL pointers for all fd - sets. (But it does work under Cygwin, which implements its own - select.) */ +#else /* fall back select */ + /* Note that, although Windows supports select, it can't be used to + implement sleeping because Winsock's select doesn't implement + timeout when it is passed NULL pointers for all fd sets. (But it + does under Cygwin, which implements Unix-compatible select.) */ struct timeval sleep; sleep.tv_sec = (long) seconds; sleep.tv_usec = 1000000 * (seconds - (long) seconds); @@ -2162,11 +1870,312 @@ xsleep (double seconds) interrupted by a signal. But without knowing how long we've actually slept, we can't return to sleep. Using gettimeofday to track sleeps is slow and unreliable due to clock skew. */ -#else /* not HAVE_SELECT */ - sleep (seconds); -#endif /* not HAVE_SELECT */ -#endif /* not HAVE_USLEEP */ -#endif /* not HAVE_NANOSLEEP */ +#endif } #endif /* not WINDOWS */ + +/* Encode the octets in DATA of length LENGTH to base64 format, + storing the result to DEST. The output will be zero-terminated, + and must point to a writable buffer of at least + 1+BASE64_LENGTH(length) bytes. The function returns the length of + the resulting base64 data, not counting the terminating zero. + + This implementation does not emit newlines after 76 characters of + base64 data. */ + +int +base64_encode (const void *data, int length, char *dest) +{ + /* Conversion table. */ + static const char tbl[64] = { + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', + 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', + 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', + 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' + }; + /* Access bytes in DATA as unsigned char, otherwise the shifts below + don't work for data with MSB set. */ + const unsigned char *s = data; + /* Theoretical ANSI violation when length < 3. */ + const unsigned char *end = (const unsigned char *) data + length - 2; + char *p = dest; + + /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ + for (; s < end; s += 3) + { + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; + *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; + *p++ = tbl[s[2] & 0x3f]; + } + + /* Pad the result if necessary... */ + switch (length % 3) + { + case 1: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[(s[0] & 3) << 4]; + *p++ = '='; + *p++ = '='; + break; + case 2: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; + *p++ = tbl[((s[1] & 0xf) << 2)]; + *p++ = '='; + break; + } + /* ...and zero-terminate it. */ + *p = '\0'; + + return p - dest; +} + +/* Store in C the next non-whitespace character from the string, or \0 + when end of string is reached. */ +#define NEXT_CHAR(c, p) do { \ + c = (unsigned char) *p++; \ +} while (ISSPACE (c)) + +#define IS_ASCII(c) (((c) & 0x80) == 0) + +/* Decode data from BASE64 (a null-terminated string) into memory + pointed to by DEST. DEST is assumed to be large enough to + accomodate the decoded data, which is guaranteed to be no more than + 3/4*strlen(base64). + + Since DEST is assumed to contain binary data, it is not + NUL-terminated. The function returns the length of the data + written to TO. -1 is returned in case of error caused by malformed + base64 input. + + This function originates from Free Recode. */ + +int +base64_decode (const char *base64, void *dest) +{ + /* Table of base64 values for first 128 characters. Note that this + assumes ASCII (but so does Wget in other places). */ + static const signed char base64_char_to_value[128] = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */ + -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */ + 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */ + -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */ + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */ + 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */ + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */ + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */ + 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */ + }; +#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c]) +#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=') + + const char *p = base64; + char *q = dest; + + while (1) + { + unsigned char c; + unsigned long value; + + /* Process first byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + break; + if (c == '=' || !IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + value = BASE64_CHAR_TO_VALUE (c) << 18; + + /* Process second byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (c == '=' || !IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + value |= BASE64_CHAR_TO_VALUE (c) << 12; + *q++ = value >> 16; + + /* Process third byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (!IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + + if (c == '=') + { + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (c != '=') + return -1; /* padding `=' expected but not found */ + continue; + } + + value |= BASE64_CHAR_TO_VALUE (c) << 6; + *q++ = 0xff & value >> 8; + + /* Process fourth byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (c == '=') + continue; + if (!IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + + value |= BASE64_CHAR_TO_VALUE (c); + *q++ = 0xff & value; + } +#undef IS_BASE64 +#undef BASE64_CHAR_TO_VALUE + + return q - (char *) dest; +} + +#undef IS_ASCII +#undef NEXT_CHAR + +/* Simple merge sort for use by stable_sort. Implementation courtesy + Zeljko Vrba with additional debugging by Nenad Barbutov. */ + +static void +mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to, + int (*cmpfun) (const void *, const void *)) +{ +#define ELT(array, pos) ((char *)(array) + (pos) * size) + if (from < to) + { + size_t i, j, k; + size_t mid = (to + from) / 2; + mergesort_internal (base, temp, size, from, mid, cmpfun); + mergesort_internal (base, temp, size, mid + 1, to, cmpfun); + i = from; + j = mid + 1; + for (k = from; (i <= mid) && (j <= to); k++) + if (cmpfun (ELT (base, i), ELT (base, j)) <= 0) + memcpy (ELT (temp, k), ELT (base, i++), size); + else + memcpy (ELT (temp, k), ELT (base, j++), size); + while (i <= mid) + memcpy (ELT (temp, k++), ELT (base, i++), size); + while (j <= to) + memcpy (ELT (temp, k++), ELT (base, j++), size); + for (k = from; k <= to; k++) + memcpy (ELT (base, k), ELT (temp, k), size); + } +#undef ELT +} + +/* Stable sort with interface exactly like standard library's qsort. + Uses mergesort internally, allocating temporary storage with + alloca. */ + +void +stable_sort (void *base, size_t nmemb, size_t size, + int (*cmpfun) (const void *, const void *)) +{ + if (size > 1) + { + void *temp = alloca (nmemb * size * sizeof (void *)); + mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun); + } +} + +/* Print a decimal number. If it is equal to or larger than ten, the + number is rounded. Otherwise it is printed with one significant + digit without trailing zeros and with no more than three fractional + digits total. For example, 0.1 is printed as "0.1", 0.035 is + printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0". + + This is useful for displaying durations because it provides + order-of-magnitude information without unnecessary clutter -- + long-running downloads are shown without the fractional part, and + short ones still retain one significant digit. */ + +const char * +print_decimal (double number) +{ + static char buf[32]; + double n = number >= 0 ? number : -number; + + if (n >= 9.95) + /* Cut off at 9.95 because the below %.1f would round 9.96 to + "10.0" instead of "10". OTOH 9.94 will print as "9.9". */ + snprintf (buf, sizeof buf, "%.0f", number); + else if (n >= 0.95) + snprintf (buf, sizeof buf, "%.1f", number); + else if (n >= 0.001) + snprintf (buf, sizeof buf, "%.1g", number); + else if (n >= 0.0005) + /* round [0.0005, 0.001) to 0.001 */ + snprintf (buf, sizeof buf, "%.3f", number); + else + /* print numbers close to 0 as 0, not 0.000 */ + strcpy (buf, "0"); + + return buf; +} + +#ifdef TESTING + +const char * +test_subdir_p() +{ + int i; + struct { + char *d1; + char *d2; + bool result; + } test_array[] = { + { "/somedir", "/somedir", true }, + { "/somedir", "/somedir/d2", true }, + { "/somedir/d1", "/somedir", false }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res = subdir_p (test_array[i].d1, test_array[i].d2); + + mu_assert ("test_subdir_p: wrong result", + res == test_array[i].result); + } + + return NULL; +} + +const char * +test_dir_matches_p() +{ + int i; + struct { + char *dirlist[3]; + char *dir; + bool result; + } test_array[] = { + { { "/somedir", "/someotherdir", NULL }, "somedir", true }, + { { "/somedir", "/someotherdir", NULL }, "anotherdir", false }, + { { "/somedir", "/*otherdir", NULL }, "anotherdir", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "d1", false }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); + + mu_assert ("test_dir_matches_p: wrong result", + res == test_array[i].result); + } + + return NULL; +} + +#endif /* TESTING */ +