X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=74e4552fd4eeb5c67e58085a3e741a24ea42267f;hb=9228f0bf53d3b42459daeb28372196a007de3014;hp=2c6fd784570ed187ed54f424cd743716d06443a4;hpb=1365950c01a87e8ede70da4d3937d379e3050d13;p=wget diff --git a/src/utils.c b/src/utils.c index 2c6fd784..74e4552f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -16,7 +16,17 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include @@ -59,6 +69,27 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ # include #endif +/* Needed for run_with_timeout. */ +#undef USE_SIGNAL_TIMEOUT +#ifdef HAVE_SIGNAL_H +# include +#endif +#ifdef HAVE_SETJMP_H +# include +#endif +/* If sigsetjmp is a macro, configure won't pick it up. */ +#ifdef sigsetjmp +# define HAVE_SIGSETJMP +#endif +#ifdef HAVE_SIGNAL +# ifdef HAVE_SIGSETJMP +# define USE_SIGNAL_TIMEOUT +# endif +# ifdef HAVE_SIGBLOCK +# define USE_SIGNAL_TIMEOUT +# endif +#endif + #include "wget.h" #include "utils.h" #include "fnmatch.h" @@ -466,126 +497,6 @@ fork_to_background (void) } #endif /* not WINDOWS */ -/* Resolve "." and ".." elements of PATH by destructively modifying - PATH. "." is resolved by removing that path element, and ".." is - resolved by removing the preceding path element. Leading and - trailing slashes are preserved. - - Return non-zero if any changes have been made. - - For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive - test examples are provided below. If you change anything in this - function, run test_path_simplify to make sure you haven't broken a - test case. - - A previous version of this function was based on path_simplify() - from GNU Bash, but it has been rewritten for Wget 1.8.1. */ - -int -path_simplify (char *path) -{ - int change = 0; - char *p, *end; - - if (path[0] == '/') - ++path; /* preserve the leading '/'. */ - - p = path; - end = p + strlen (p) + 1; /* position past the terminating zero. */ - - while (1) - { - again: - /* P should point to the beginning of a path element. */ - - if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0')) - { - /* Handle "./foo" by moving "foo" two characters to the - left. */ - if (*(p + 1) == '/') - { - change = 1; - memmove (p, p + 2, end - p); - end -= 2; - goto again; - } - else - { - change = 1; - *p = '\0'; - break; - } - } - else if (*p == '.' && *(p + 1) == '.' - && (*(p + 2) == '/' || *(p + 2) == '\0')) - { - /* Handle "../foo" by moving "foo" one path element to the - left. */ - char *b = p; /* not p-1 because P can equal PATH */ - - /* Backtrack by one path element, but not past the beginning - of PATH. */ - - /* foo/bar/../baz */ - /* ^ p */ - /* ^ b */ - - if (b > path) - { - /* Move backwards until B hits the beginning of the - previous path element or the beginning of path. */ - for (--b; b > path && *(b - 1) != '/'; b--) - ; - } - - change = 1; - if (*(p + 2) == '/') - { - memmove (b, p + 3, end - (p + 3)); - end -= (p + 3) - b; - p = b; - } - else - { - *b = '\0'; - break; - } - - goto again; - } - else if (*p == '/') - { - /* Remove empty path elements. Not mandated by rfc1808 et - al, but empty path elements are not all that useful, and - the rest of Wget might not deal with them well. */ - char *q = p; - while (*q == '/') - ++q; - change = 1; - if (*q == '\0') - { - *p = '\0'; - break; - } - memmove (p, q, end - q); - end -= q - p; - goto again; - } - - /* Skip to the next path element. */ - while (*p && *p != '/') - ++p; - if (*p == '\0') - break; - - /* Make sure P points to the beginning of the next path element, - which is location after the slash. */ - ++p; - } - - return change; -} - /* "Touch" FILE, i.e. make its atime and mtime equal to the time specified with TM. */ void @@ -653,6 +564,22 @@ file_non_directory_p (const char *path) return S_ISDIR (buf.st_mode) ? 0 : 1; } +/* Return the size of file named by FILENAME, or -1 if it cannot be + opened or seeked into. */ +long +file_size (const char *filename) +{ + long size; + /* We use fseek rather than stat to determine the file size because + that way we can also verify whether the file is readable. + Inspired by the POST patch by Arnaud Wylie. */ + FILE *fp = fopen (filename, "rb"); + fseek (fp, 0, SEEK_END); + size = ftell (fp); + fclose (fp); + return size; +} + /* Return a unique filename, given a prefix and count */ static char * unique_name_1 (const char *fileprefix, int count) @@ -699,6 +626,7 @@ make_directory (const char *directory) { int quit = 0; int i; + int ret = 0; char *dir; /* Make a copy of dir, to be able to write to it. Otherwise, the @@ -714,18 +642,19 @@ make_directory (const char *directory) if (!dir[i]) quit = 1; dir[i] = '\0'; - /* Check whether the directory already exists. */ + /* Check whether the directory already exists. Allow creation of + of intermediate directories to fail, as the initial path components + are not necessarily directories! */ if (!file_exists_p (dir)) - { - if (mkdir (dir, 0777) < 0) - return -1; - } + ret = mkdir (dir, 0777); + else + ret = 0; if (quit) break; else dir[i] = '/'; } - return 0; + return ret; } /* Merge BASE with FILE. BASE can be a directory or a file name, FILE @@ -836,20 +765,37 @@ accdir (const char *directory, enum accd flags) return 1; } -/* Match the end of STRING against PATTERN. For instance: +/* Return non-zero if STRING ends with TAIL. For instance: + + match_tail ("abc", "bc", 0) -> 1 + match_tail ("abc", "ab", 0) -> 0 + match_tail ("abc", "abc", 0) -> 1 + + If FOLD_CASE_P is non-zero, the comparison will be + case-insensitive. */ - match_backwards ("abc", "bc") -> 1 - match_backwards ("abc", "ab") -> 0 - match_backwards ("abc", "abc") -> 1 */ int -match_tail (const char *string, const char *pattern) +match_tail (const char *string, const char *tail, int fold_case_p) { int i, j; - for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--) - if (string[i] != pattern[j]) - break; - /* If the pattern was exhausted, the match was succesful. */ + /* We want this to be fast, so we code two loops, one with + case-folding, one without. */ + + if (!fold_case_p) + { + for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) + if (string[i] != tail[j]) + break; + } + else + { + for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) + if (TOLOWER (string[i]) != TOLOWER (tail[j])) + break; + } + + /* If the tail was exhausted, the match was succesful. */ if (j == -1) return 1; else @@ -878,7 +824,7 @@ in_acclist (const char *const *accepts, const char *s, int backward) { if (backward) { - if (match_tail (s, *accepts)) + if (match_tail (s, *accepts, 0)) return 1; } else @@ -910,6 +856,31 @@ suffix (const char *str) return NULL; } +/* Return non-zero if FNAME ends with a typical HTML suffix. The + following (case-insensitive) suffixes are presumed to be HTML files: + + html + htm + ?html (`?' matches one character) + + #### CAVEAT. This is not necessarily a good indication that FNAME + refers to a file that contains HTML! */ +int +has_html_suffix_p (const char *fname) +{ + char *suf; + + if ((suf = suffix (fname)) == NULL) + return 0; + if (!strcasecmp (suf, "html")) + return 1; + if (!strcasecmp (suf, "htm")) + return 1; + if (suf[0] && !strcasecmp (suf + 1, "html")) + return 1; + return 0; +} + /* Read a line from FP and return the pointer to freshly allocated storage. The stoarage space is obtained through malloc() and should be freed with free() when it is no longer needed. @@ -1403,6 +1374,12 @@ numdigit (long number) return cnt; } +/* A half-assed implementation of INT_MAX on machines that don't + bother to define one. */ +#ifndef INT_MAX +# define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1)) +#endif + #define ONE_DIGIT(figure) *p++ = n / (figure) + '0' #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure)) @@ -1462,6 +1439,15 @@ number_to_string (char *buffer, long number) if (n < 0) { + if (n < -INT_MAX) + { + /* We cannot print a '-' and assign -n to n because -n would + overflow. Let sprintf deal with this border case. */ + sprintf (buffer, "%ld", n); + p += strlen (buffer); + return p; + } + *p++ = '-'; n = -n; } @@ -1546,19 +1532,30 @@ number_to_string (char *buffer, long number) # endif #endif /* not WINDOWS */ -struct wget_timer { #ifdef TIMER_GETTIMEOFDAY - long secs; - long usecs; +typedef struct timeval wget_sys_time; #endif #ifdef TIMER_TIME - time_t secs; +typedef time_t wget_sys_time; #endif #ifdef TIMER_WINDOWS - ULARGE_INTEGER wintime; +typedef ULARGE_INTEGER wget_sys_time; #endif + +struct wget_timer { + /* The starting point in time which, subtracted from the current + time, yields elapsed time. */ + wget_sys_time start; + + /* The most recent elapsed time, calculated by wtimer_elapsed(). + Measured in milliseconds. */ + double elapsed_last; + + /* Approximately, the time elapsed between the true start of the + measurement and the time represented by START. */ + double elapsed_pre_start; }; /* Allocate a timer. It is not legal to do anything with a freshly @@ -1591,76 +1588,121 @@ wtimer_delete (struct wget_timer *wt) xfree (wt); } -/* Reset timer WT. This establishes the starting point from which - wtimer_elapsed() will return the number of elapsed - milliseconds. It is allowed to reset a previously used timer. */ +/* Store system time to WST. */ -void -wtimer_reset (struct wget_timer *wt) +static void +wtimer_sys_set (wget_sys_time *wst) { #ifdef TIMER_GETTIMEOFDAY - struct timeval t; - gettimeofday (&t, NULL); - wt->secs = t.tv_sec; - wt->usecs = t.tv_usec; + gettimeofday (wst, NULL); #endif #ifdef TIMER_TIME - wt->secs = time (NULL); + time (wst); #endif #ifdef TIMER_WINDOWS + /* We use GetSystemTime to get the elapsed time. MSDN warns that + system clock adjustments can skew the output of GetSystemTime + when used as a timer and gives preference to GetTickCount and + high-resolution timers. But GetTickCount can overflow, and hires + timers are typically used for profiling, not for regular time + measurement. Since we handle clock skew anyway, we just use + GetSystemTime. */ FILETIME ft; SYSTEMTIME st; GetSystemTime (&st); + + /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy + FILETIME to ULARGE_INTEGER, and use regular 64-bit integer + arithmetic on that. */ SystemTimeToFileTime (&st, &ft); - wt->wintime.HighPart = ft.dwHighDateTime; - wt->wintime.LowPart = ft.dwLowDateTime; + wst->HighPart = ft.dwHighDateTime; + wst->LowPart = ft.dwLowDateTime; #endif } -/* Return the number of milliseconds elapsed since the timer was last - reset. It is allowed to call this function more than once to get - increasingly higher elapsed values. */ +/* Reset timer WT. This establishes the starting point from which + wtimer_elapsed() will return the number of elapsed + milliseconds. It is allowed to reset a previously used timer. */ -long -wtimer_elapsed (struct wget_timer *wt) +void +wtimer_reset (struct wget_timer *wt) +{ + /* Set the start time to the current time. */ + wtimer_sys_set (&wt->start); + wt->elapsed_last = 0; + wt->elapsed_pre_start = 0; +} + +static double +wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2) { #ifdef TIMER_GETTIMEOFDAY - struct timeval t; - gettimeofday (&t, NULL); - return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000; + return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000 + + (double)(wst1->tv_usec - wst2->tv_usec) / 1000); #endif #ifdef TIMER_TIME - time_t now = time (NULL); - return 1000 * (now - wt->secs); + return 1000 * (*wst1 - *wst2); #endif #ifdef WINDOWS - FILETIME ft; - SYSTEMTIME st; - ULARGE_INTEGER uli; - GetSystemTime (&st); - SystemTimeToFileTime (&st, &ft); - uli.HighPart = ft.dwHighDateTime; - uli.LowPart = ft.dwLowDateTime; - return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000); + return (double)(wst1->QuadPart - wst2->QuadPart) / 10000; #endif } -/* Return the assessed granularity of the timer implementation. This - is important for certain code that tries to deal with "zero" time - intervals. */ +/* Return the number of milliseconds elapsed since the timer was last + reset. It is allowed to call this function more than once to get + increasingly higher elapsed values. These timers handle clock + skew. */ -long +double +wtimer_elapsed (struct wget_timer *wt) +{ + wget_sys_time now; + double elapsed; + + wtimer_sys_set (&now); + elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start); + + /* Ideally we'd just return the difference between NOW and + wt->start. However, the system timer can be set back, and we + could return a value smaller than when we were last called, even + a negative value. Both of these would confuse the callers, which + expect us to return monotonically nondecreasing values. + + Therefore: if ELAPSED is smaller than its previous known value, + we reset wt->start to the current time and effectively start + measuring from this point. But since we don't want the elapsed + value to start from zero, we set elapsed_pre_start to the last + elapsed time and increment all future calculations by that + amount. */ + + if (elapsed < wt->elapsed_last) + { + wt->start = now; + wt->elapsed_pre_start = wt->elapsed_last; + elapsed = wt->elapsed_last; + } + + wt->elapsed_last = elapsed; + return elapsed; +} + +/* Return the assessed granularity of the timer implementation, in + milliseconds. This is used by code that tries to substitute a + better value for timers that have returned zero. */ + +double wtimer_granularity (void) { #ifdef TIMER_GETTIMEOFDAY - /* Granularity of gettimeofday is hugely architecture-dependent. - However, it appears that on modern machines it is better than - 1ms. */ - return 1; + /* Granularity of gettimeofday varies wildly between architectures. + However, it appears that on modern machines it tends to be better + than 1ms. Assume 100 usecs. (Perhaps the configure process + could actually measure this?) */ + return 0.1; #endif #ifdef TIMER_TIME @@ -1669,7 +1711,8 @@ wtimer_granularity (void) #endif #ifdef TIMER_WINDOWS - /* ? */ + /* According to MSDN, GetSystemTime returns a broken-down time + structure the smallest member of which are milliseconds. */ return 1; #endif } @@ -1774,6 +1817,49 @@ determine_screen_width (void) #endif /* TIOCGWINSZ */ } +/* Return a random number between 0 and MAX-1, inclusive. + + If MAX is greater than the value of RAND_MAX+1 on the system, the + returned value will be in the range [0, RAND_MAX]. This may be + fixed in a future release. + + The random number generator is seeded automatically the first time + it is called. + + This uses rand() for portability. It has been suggested that + random() offers better randomness, but this is not required for + Wget, so I chose to go for simplicity and use rand + unconditionally. */ + +int +random_number (int max) +{ + static int seeded; + double bounded; + int rnd; + + if (!seeded) + { + srand (time (NULL)); + seeded = 1; + } + rnd = rand (); + + /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1, + and enforce that assumption by masking other bits. */ +#ifndef RAND_MAX +# define RAND_MAX 32767 + rnd &= RAND_MAX; +#endif + + /* This is equivalent to rand() % max, but uses the high-order bits + for better randomness on architecture where rand() is implemented + using a simple congruential generator. */ + + bounded = (double)max * rnd / (RAND_MAX + 1.0); + return (int)bounded; +} + #if 0 /* A debugging function for checking whether an MD5 library works. */ @@ -1807,96 +1893,75 @@ debug_test_md5 (char *buf) return res; } #endif + +/* Implementation of run_with_timeout, a generic timeout handler for + systems with Unix-like signal handling. */ +#ifdef USE_SIGNAL_TIMEOUT +# ifdef HAVE_SIGSETJMP +# define SETJMP(env) sigsetjmp (env, 1) -#if 0 -/* Debugging and testing support for path_simplify. */ +static sigjmp_buf run_with_timeout_env; -/* Debug: run path_simplify on PATH and return the result in a new - string. Useful for calling from the debugger. */ -static char * -ps (char *path) +static RETSIGTYPE +abort_run_with_timeout (int sig) { - char *copy = xstrdup (path); - path_simplify (copy); - return copy; + assert (sig == SIGALRM); + siglongjmp (run_with_timeout_env, -1); } +# else /* not HAVE_SIGSETJMP */ +# define SETJMP(env) setjmp (env) -static void -run_test (char *test, char *expected_result, int expected_change) +static jmp_buf run_with_timeout_env; + +static RETSIGTYPE +abort_run_with_timeout (int sig) { - char *test_copy = xstrdup (test); - int modified = path_simplify (test_copy); + assert (sig == SIGALRM); + /* We don't have siglongjmp to preserve the set of blocked signals; + if we longjumped out of the handler at this point, SIGALRM would + remain blocked. We must unblock it manually. */ + int mask = siggetmask (); + mask &= ~sigmask(SIGALRM); + sigsetmask (mask); - if (0 != strcmp (test_copy, expected_result)) - { - printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n", - test, expected_result, test_copy); - } - if (modified != expected_change) - { - if (expected_change == 1) - printf ("Expected no modification with path_simplify(\"%s\").\n", - test); - else - printf ("Expected modification with path_simplify(\"%s\").\n", - test); - } - xfree (test_copy); + /* Now it's safe to longjump. */ + longjmp (run_with_timeout_env, -1); } +# endif /* not HAVE_SIGSETJMP */ +#endif /* USE_SIGNAL_TIMEOUT */ -static void -test_path_simplify (void) -{ - static struct { - char *test, *result; - int should_modify; - } tests[] = { - { "", "", 0 }, - { ".", "", 1 }, - { "..", "", 1 }, - { "foo", "foo", 0 }, - { "foo/bar", "foo/bar", 0 }, - { "foo///bar", "foo/bar", 1 }, - { "foo/.", "foo/", 1 }, - { "foo/./", "foo/", 1 }, - { "foo./", "foo./", 0 }, - { "foo/../bar", "bar", 1 }, - { "foo/../bar/", "bar/", 1 }, - { "foo/bar/..", "foo/", 1 }, - { "foo/bar/../x", "foo/x", 1 }, - { "foo/bar/../x/", "foo/x/", 1 }, - { "foo/..", "", 1 }, - { "foo/../..", "", 1 }, - { "a/b/../../c", "c", 1 }, - { "./a/../b", "b", 1 } - }; - int i; +int +run_with_timeout (long timeout, void (*fun) (void *), void *arg) +{ +#ifndef USE_SIGNAL_TIMEOUT + fun (arg); + return 0; +#else + int saved_errno; - for (i = 0; i < ARRAY_SIZE (tests); i++) + if (timeout == 0) { - char *test = tests[i].test; - char *expected_result = tests[i].result; - int expected_change = tests[i].should_modify; - run_test (test, expected_result, expected_change); + fun (arg); + return 0; } - /* Now run all the tests with a leading slash before the test case, - to prove that the slash is being preserved. */ - for (i = 0; i < ARRAY_SIZE (tests); i++) + signal (SIGALRM, abort_run_with_timeout); + if (SETJMP (run_with_timeout_env) != 0) { - char *test, *expected_result; - int expected_change = tests[i].should_modify; - - test = xmalloc (1 + strlen (tests[i].test) + 1); - sprintf (test, "/%s", tests[i].test); - - expected_result = xmalloc (1 + strlen (tests[i].result) + 1); - sprintf (expected_result, "/%s", tests[i].result); + /* Longjumped out of FUN with a timeout. */ + signal (SIGALRM, SIG_DFL); + return 1; + } + alarm (timeout); + fun (arg); - run_test (test, expected_result, expected_change); + /* Preserve errno in case alarm() or signal() modifies it. */ + saved_errno = errno; + alarm (0); + signal (SIGALRM, SIG_DFL); + errno = saved_errno; - xfree (test); - xfree (expected_result); - } -} + return 0; #endif +} +