X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=cd1e645d42e7968ec4eb5d8dfdf5d9400b818e5a;hb=f3d3a50a5697957befaf94cb44797234a68068c3;hp=08e8bbba736681bd489e7decbe6a6ede309a8c6a;hpb=f5799945b0f8e249bcc94e5a8c34d785e64c88c7;p=wget diff --git a/src/utils.c b/src/utils.c index 08e8bbba..cd1e645d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -50,9 +50,35 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #endif #include #include + +/* For TIOCGWINSZ and friends: */ #ifdef HAVE_SYS_IOCTL_H # include #endif +#ifdef HAVE_TERMIOS_H +# include +#endif + +/* Needed for run_with_timeout. */ +#undef USE_SIGNAL_TIMEOUT +#ifdef HAVE_SIGNAL_H +# include +#endif +#ifdef HAVE_SETJMP_H +# include +#endif +/* If sigsetjmp is a macro, configure won't pick it up. */ +#ifdef sigsetjmp +# define HAVE_SIGSETJMP +#endif +#ifdef HAVE_SIGNAL +# ifdef HAVE_SIGSETJMP +# define USE_SIGNAL_TIMEOUT +# endif +# ifdef HAVE_SIGBLOCK +# define USE_SIGNAL_TIMEOUT +# endif +#endif #include "wget.h" #include "utils.h" @@ -79,21 +105,9 @@ extern int errno; static void memfatal (const char *what) { - /* HACK: expose save_log_p from log.c, so we can turn it off in - order to prevent saving the log. Saving the log is dangerous - because logprintf() and logputs() can call malloc(), so this - could infloop. When logging is turned off, infloop can no longer - happen. - - #### This is no longer really necessary because the new routines - in log.c cons only if the line exceeds eighty characters. But - this can come at the end of a line, so it's OK to be careful. - - On a more serious note, it would be good to have a - log_forced_shutdown() routine that exposes this cleanly. */ - extern int save_log_p; - - save_log_p = 0; + /* Make sure we don't try to store part of the log line, and thus + call malloc. */ + log_set_save_context (0); logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what); exit (1); } @@ -459,147 +473,19 @@ fork_to_background (void) else if (pid != 0) { /* parent, no error */ - printf (_("Continuing in background.\n")); + printf (_("Continuing in background, pid %d.\n"), (int)pid); if (changedp) printf (_("Output will be written to `%s'.\n"), opt.lfilename); - exit (0); - } - /* child: keep running */ -} -#endif /* not WINDOWS */ - -#if 0 -/* debug */ -char * -ps (char *orig) -{ - char *r = xstrdup (orig); - path_simplify (r); - return r; -} -#endif - -/* Canonicalize PATH, and return a new path. The new path differs from PATH - in that: - Multple `/'s are collapsed to a single `/'. - Leading `./'s and trailing `/.'s are removed. - Trailing `/'s are removed. - Non-leading `../'s and trailing `..'s are handled by removing - portions of the path. - - E.g. "a/b/c/./../d/.." will yield "a/b". This function originates - from GNU Bash. - - Changes for Wget: - Always use '/' as stub_char. - Don't check for local things using canon_stat. - Change the original string instead of strdup-ing. - React correctly when beginning with `./' and `../'. - Don't zip out trailing slashes. */ -int -path_simplify (char *path) -{ - register int i, start; - int changes = 0; - char stub_char; - - if (!*path) - return 0; - - stub_char = '/'; - - if (path[0] == '/') - /* Preserve initial '/'. */ - ++path; - - /* Nix out leading `.' or `..' with. */ - if ((path[0] == '.' && path[1] == '\0') - || (path[0] == '.' && path[1] == '.' && path[2] == '\0')) - { - path[0] = '\0'; - changes = 1; - return changes; - } - - /* Walk along PATH looking for things to compact. */ - i = 0; - while (1) - { - if (!path[i]) - break; - - while (path[i] && path[i] != '/') - i++; - - start = i++; - - /* If we didn't find any slashes, then there is nothing left to do. */ - if (!path[start]) - break; - - /* Handle multiple `/'s in a row. */ - while (path[i] == '/') - i++; - - if ((start + 1) != i) - { - strcpy (path + start + 1, path + i); - i = start + 1; - changes = 1; - } - - /* Check for `../', `./' or trailing `.' by itself. */ - if (path[i] == '.') - { - /* Handle trailing `.' by itself. */ - if (!path[i + 1]) - { - path[--i] = '\0'; - changes = 1; - break; - } - - /* Handle `./'. */ - if (path[i + 1] == '/') - { - strcpy (path + i, path + i + 1); - i = (start < 0) ? 0 : start; - changes = 1; - continue; - } - - /* Handle `../' or trailing `..' by itself. */ - if (path[i + 1] == '.' && - (path[i + 2] == '/' || !path[i + 2])) - { - while (--start > -1 && path[start] != '/'); - strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2])); - i = (start < 0) ? 0 : start; - changes = 1; - continue; - } - } /* path == '.' */ - } /* while */ - - /* Addition: Remove all `./'-s and `../'-s preceding the string. */ - i = 0; - while (1) - { - if (path[i] == '.' && path[i + 1] == '/') - i += 2; - else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/') - i += 3; - else - break; - } - if (i) - { - strcpy (path, path + i - 0); - changes = 1; + exit (0); /* #### should we use _exit()? */ } - return changes; + /* child: give up the privileges and keep running. */ + setsid (); + freopen ("/dev/null", "r", stdin); + freopen ("/dev/null", "w", stdout); + freopen ("/dev/null", "w", stderr); } +#endif /* not WINDOWS */ /* "Touch" FILE, i.e. make its atime and mtime equal to the time specified with TM. */ @@ -714,6 +600,7 @@ make_directory (const char *directory) { int quit = 0; int i; + int ret = 0; char *dir; /* Make a copy of dir, to be able to write to it. Otherwise, the @@ -729,18 +616,19 @@ make_directory (const char *directory) if (!dir[i]) quit = 1; dir[i] = '\0'; - /* Check whether the directory already exists. */ + /* Check whether the directory already exists. Allow creation of + of intermediate directories to fail, as the initial path components + are not necessarily directories! */ if (!file_exists_p (dir)) - { - if (mkdir (dir, 0777) < 0) - return -1; - } + ret = mkdir (dir, 0777); + else + ret = 0; if (quit) break; else dir[i] = '/'; } - return 0; + return ret; } /* Merge BASE with FILE. BASE can be a directory or a file name, FILE @@ -925,11 +813,38 @@ suffix (const char *str) return NULL; } -/* Read a line from FP. The function reallocs the storage as needed - to accomodate for any length of the line. Reallocs are done - storage exponentially, doubling the storage after each overflow to - minimize the number of calls to realloc() and fgets(). The newline - character at the end of line is retained. +/* Return non-zero if FNAME ends with a typical HTML suffix. The + following (case-insensitive) suffixes are presumed to be HTML files: + + html + htm + ?html (`?' matches one character) + + #### CAVEAT. This is not necessarily a good indication that FNAME + refers to a file that contains HTML! */ +int +has_html_suffix_p (const char *fname) +{ + char *suf; + + if ((suf = suffix (fname)) == NULL) + return 0; + if (!strcasecmp (suf, "html")) + return 1; + if (!strcasecmp (suf, "htm")) + return 1; + if (suf[0] && !strcasecmp (suf + 1, "html")) + return 1; + return 0; +} + +/* Read a line from FP and return the pointer to freshly allocated + storage. The stoarage space is obtained through malloc() and + should be freed with free() when it is no longer needed. + + The length of the line is not limited, except by available memory. + The newline character at the end of line is retained. The line is + terminated with a zero character. After end-of-file is encountered without anything being read, NULL is returned. NULL is also returned on error. To distinguish @@ -939,15 +854,20 @@ char * read_whole_line (FILE *fp) { int length = 0; - int bufsize = 81; + int bufsize = 82; char *line = (char *)xmalloc (bufsize); while (fgets (line + length, bufsize - length, fp)) { length += strlen (line + length); - assert (length > 0); + if (length == 0) + /* Possible for example when reading from a binary file where + a line begins with \0. */ + continue; + if (line[length - 1] == '\n') break; + /* fgets() guarantees to read the whole line, or to use up the space we've given it. We can double the buffer unconditionally. */ @@ -1342,7 +1262,7 @@ legible (long l) { char inbuf[24]; /* Print the number into the buffer. */ - long_to_string (inbuf, l); + number_to_string (inbuf, l); return legible_1 (inbuf); } @@ -1398,17 +1318,17 @@ legible_very_long (VERY_LONG_TYPE l) /* Count the digits in a (long) integer. */ int -numdigit (long a) +numdigit (long number) { - int res = 1; - if (a < 0) + int cnt = 1; + if (number < 0) { - a = -a; - ++res; + number = -number; + ++cnt; } - while ((a /= 10) != 0) - ++res; - return res; + while ((number /= 10) > 0) + ++cnt; + return cnt; } #define ONE_DIGIT(figure) *p++ = n / (figure) + '0' @@ -1437,21 +1357,26 @@ numdigit (long a) #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10) #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10) -/* Print NUMBER to BUFFER in base 10. This is completely equivalent - to `sprintf(buffer, "%ld", number)', only much faster. +/* Print NUMBER to BUFFER in base 10. This should be completely + equivalent to `sprintf(buffer, "%ld", number)', only much faster. The speedup may make a difference in programs that frequently convert numbers to strings. Some implementations of sprintf, particularly the one in GNU libc, have been known to be extremely slow compared to this function. - BUFFER should accept as many bytes as you expect the number to take - up. On machines with 64-bit longs the maximum needed size is 24 - bytes. That includes the worst-case digits, the optional `-' sign, - and the trailing \0. */ + Return the pointer to the location where the terminating zero was + printed. (Equivalent to calling buffer+strlen(buffer) after the + function is done.) -void -long_to_string (char *buffer, long number) + BUFFER should be big enough to accept as many bytes as you expect + the number to take up. On machines with 64-bit longs the maximum + needed size is 24 bytes. That includes the digits needed for the + largest 64-bit number, the `-' sign in case it's negative, and the + terminating '\0'. */ + +char * +number_to_string (char *buffer, long number) { char *p = buffer; long n = number; @@ -1460,6 +1385,7 @@ long_to_string (char *buffer, long number) /* We are running in a strange or misconfigured environment. Let sprintf cope with it. */ sprintf (buffer, "%ld", n); + p += strlen (buffer); #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */ if (n < 0) @@ -1495,6 +1421,8 @@ long_to_string (char *buffer, long number) *p = '\0'; #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */ + + return p; } #undef ONE_DIGIT @@ -1774,7 +1702,50 @@ determine_screen_width (void) #endif /* TIOCGWINSZ */ } -#if 1 +/* Return a random number between 0 and MAX-1, inclusive. + + If MAX is greater than the value of RAND_MAX+1 on the system, the + returned value will be in the range [0, RAND_MAX]. This may be + fixed in a future release. + + The random number generator is seeded automatically the first time + it is called. + + This uses rand() for portability. It has been suggested that + random() offers better randomness, but this is not required for + Wget, so I chose to go for simplicity and use rand + unconditionally. */ + +int +random_number (int max) +{ + static int seeded; + double bounded; + int rnd; + + if (!seeded) + { + srand (time (NULL)); + seeded = 1; + } + rnd = rand (); + + /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1, + and enforce that assumption by masking other bits. */ +#ifndef RAND_MAX +# define RAND_MAX 32767 + rnd &= RAND_MAX; +#endif + + /* This is equivalent to rand() % max, but uses the high-order bits + for better randomness on architecture where rand() is implemented + using a simple congruential generator. */ + + bounded = (double)max * rnd / (RAND_MAX + 1.0); + return (int)bounded; +} + +#if 0 /* A debugging function for checking whether an MD5 library works. */ #include "gen-md5.h" @@ -1807,3 +1778,73 @@ debug_test_md5 (char *buf) return res; } #endif + +/* Implementation of run_with_timeout, a generic timeout handler for + systems with Unix-like signal handling. */ +#ifdef HAVE_SIGSETJMP +#define SETJMP(env) sigsetjmp (env, 1) + +static sigjmp_buf run_with_timeout_env; + +static RETSIGTYPE +abort_run_with_timeout (int sig) +{ + assert (sig == SIGALRM); + siglongjmp (run_with_timeout_env, -1); +} +#else /* not HAVE_SIGSETJMP */ +#define SETJMP(env) setjmp (env) + +static jmp_buf run_with_timeout_env; + +static RETSIGTYPE +abort_run_with_timeout (int sig) +{ + assert (sig == SIGALRM); + /* We don't have siglongjmp to preserve the set of blocked signals; + if we longjumped out of the handler at this point, SIGALRM would + remain blocked. We must unblock it manually. */ + int mask = siggetmask (); + mask &= ~sigmask(SIGALRM); + sigsetmask (mask); + + /* Now it's safe to longjump. */ + longjmp (run_with_timeout_env, -1); +} +#endif /* not HAVE_SIGSETJMP */ + +int +run_with_timeout (long timeout, void (*fun) (void *), void *arg) +{ +#ifndef USE_SIGNAL_TIMEOUT + fun (arg); + return 0; +#else + int saved_errno; + + if (timeout == 0) + { + fun (arg); + return 0; + } + + signal (SIGALRM, abort_run_with_timeout); + if (SETJMP (run_with_timeout_env) != 0) + { + /* Longjumped out of FUN with a timeout. */ + signal (SIGALRM, SIG_DFL); + return 1; + } + alarm (timeout); + fun (arg); + + /* Preserve errno in case alarm() or signal() modifies it. */ + saved_errno = errno; + alarm (0); + signal (SIGALRM, SIG_DFL); + errno = saved_errno; + + return 0; +#endif +} +