# include <termios.h>
#endif
+/* Needed for run_with_timeout. */
+#undef USE_SIGNAL_TIMEOUT
+#ifdef HAVE_SIGNAL_H
+# include <signal.h>
+#endif
+#ifdef HAVE_SETJMP_H
+# include <setjmp.h>
+#endif
+/* If sigsetjmp is a macro, configure won't pick it up. */
+#ifdef sigsetjmp
+# define HAVE_SIGSETJMP
+#endif
+#ifdef HAVE_SIGNAL
+# ifdef HAVE_SIGSETJMP
+# define USE_SIGNAL_TIMEOUT
+# endif
+# ifdef HAVE_SIGBLOCK
+# define USE_SIGNAL_TIMEOUT
+# endif
+#endif
+
#include "wget.h"
#include "utils.h"
#include "fnmatch.h"
static void
memfatal (const char *what)
{
- /* HACK: expose save_log_p from log.c, so we can turn it off in
- order to prevent saving the log. Saving the log is dangerous
- because logprintf() and logputs() can call malloc(), so this
- could infloop. When logging is turned off, infloop can no longer
- happen.
-
- #### This is no longer really necessary because the new routines
- in log.c cons only if the line exceeds eighty characters. But
- this can come at the end of a line, so it's OK to be careful.
-
- On a more serious note, it would be good to have a
- log_forced_shutdown() routine that exposes this cleanly. */
- extern int save_log_p;
-
- save_log_p = 0;
+ /* Make sure we don't try to store part of the log line, and thus
+ call malloc. */
+ log_set_save_context (0);
logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
exit (1);
}
else if (pid != 0)
{
/* parent, no error */
- printf (_("Continuing in background.\n"));
+ printf (_("Continuing in background, pid %d.\n"), (int)pid);
if (changedp)
printf (_("Output will be written to `%s'.\n"), opt.lfilename);
- exit (0);
- }
- /* child: keep running */
-}
-#endif /* not WINDOWS */
-\f
-/* Resolve "." and ".." elements of PATH by destructively modifying
- PATH. "." is resolved by removing that path element, and ".." is
- resolved by removing the preceding path element. Leading and
- trailing slashes are preserved.
-
- Return non-zero if any changes have been made.
-
- For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive
- test examples are provided below. If you change anything in this
- function, run test_path_simplify to make sure you haven't broken a
- test case.
-
- A previous version of this function was based on path_simplify()
- from GNU Bash, but it has been rewritten for Wget 1.8.1. */
-
-int
-path_simplify (char *path)
-{
- int change = 0;
- char *p, *end;
-
- if (path[0] == '/')
- ++path; /* preserve the leading '/'. */
-
- p = path;
- end = p + strlen (p) + 1; /* position past the terminating zero. */
-
- while (1)
- {
- again:
- /* P should point to the beginning of a path element. */
-
- if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0'))
- {
- /* Handle "./foo" by moving "foo" two characters to the
- left. */
- if (*(p + 1) == '/')
- {
- change = 1;
- memmove (p, p + 2, end - p);
- end -= 2;
- goto again;
- }
- else
- {
- change = 1;
- *p = '\0';
- break;
- }
- }
- else if (*p == '.' && *(p + 1) == '.'
- && (*(p + 2) == '/' || *(p + 2) == '\0'))
- {
- /* Handle "../foo" by moving "foo" one path element to the
- left. */
- char *b = p; /* not p-1 because P can equal PATH */
-
- /* Backtrack by one path element, but not past the beginning
- of PATH. */
-
- /* foo/bar/../baz */
- /* ^ p */
- /* ^ b */
-
- if (b > path)
- {
- /* Move backwards until B hits the beginning of the
- previous path element or the beginning of path. */
- for (--b; b > path && *(b - 1) != '/'; b--)
- ;
- }
-
- change = 1;
- if (*(p + 2) == '/')
- {
- memmove (b, p + 3, end - (p + 3));
- end -= (p + 3) - b;
- p = b;
- }
- else
- {
- *b = '\0';
- break;
- }
-
- goto again;
- }
- else if (*p == '/')
- {
- /* Remove empty path elements. Not mandated by rfc1808 et
- al, but empty path elements are not all that useful, and
- the rest of Wget might not deal with them well. */
- char *q = p;
- while (*q == '/')
- ++q;
- change = 1;
- if (*q == '\0')
- {
- *p = '\0';
- break;
- }
- memmove (p, q, end - q);
- end -= q - p;
- goto again;
- }
-
- /* Skip to the next path element. */
- while (*p && *p != '/')
- ++p;
- if (*p == '\0')
- break;
-
- /* Make sure P points to the beginning of the next path element,
- which is location after the slash. */
- ++p;
+ exit (0); /* #### should we use _exit()? */
}
- return change;
+ /* child: give up the privileges and keep running. */
+ setsid ();
+ freopen ("/dev/null", "r", stdin);
+ freopen ("/dev/null", "w", stdout);
+ freopen ("/dev/null", "w", stderr);
}
+#endif /* not WINDOWS */
\f
/* "Touch" FILE, i.e. make its atime and mtime equal to the time
specified with TM. */
return S_ISDIR (buf.st_mode) ? 0 : 1;
}
+/* Return the size of file named by FILENAME, or -1 if it cannot be
+ opened or seeked into. */
+long
+file_size (const char *filename)
+{
+ long size;
+ /* We use fseek rather than stat to determine the file size because
+ that way we can also verify whether the file is readable.
+ Inspired by the POST patch by Arnaud Wylie. */
+ FILE *fp = fopen (filename, "rb");
+ fseek (fp, 0, SEEK_END);
+ size = ftell (fp);
+ fclose (fp);
+ return size;
+}
+
/* Return a unique filename, given a prefix and count */
static char *
unique_name_1 (const char *fileprefix, int count)
{
int quit = 0;
int i;
+ int ret = 0;
char *dir;
/* Make a copy of dir, to be able to write to it. Otherwise, the
if (!dir[i])
quit = 1;
dir[i] = '\0';
- /* Check whether the directory already exists. */
+ /* Check whether the directory already exists. Allow creation of
+ of intermediate directories to fail, as the initial path components
+ are not necessarily directories! */
if (!file_exists_p (dir))
- {
- if (mkdir (dir, 0777) < 0)
- return -1;
- }
+ ret = mkdir (dir, 0777);
+ else
+ ret = 0;
if (quit)
break;
else
dir[i] = '/';
}
- return 0;
+ return ret;
}
/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
return 1;
}
-/* Match the end of STRING against PATTERN. For instance:
+/* Return non-zero if STRING ends with TAIL. For instance:
+
+ match_tail ("abc", "bc", 0) -> 1
+ match_tail ("abc", "ab", 0) -> 0
+ match_tail ("abc", "abc", 0) -> 1
+
+ If FOLD_CASE_P is non-zero, the comparison will be
+ case-insensitive. */
- match_backwards ("abc", "bc") -> 1
- match_backwards ("abc", "ab") -> 0
- match_backwards ("abc", "abc") -> 1 */
int
-match_tail (const char *string, const char *pattern)
+match_tail (const char *string, const char *tail, int fold_case_p)
{
int i, j;
- for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
- if (string[i] != pattern[j])
- break;
- /* If the pattern was exhausted, the match was succesful. */
+ /* We want this to be fast, so we code two loops, one with
+ case-folding, one without. */
+
+ if (!fold_case_p)
+ {
+ for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
+ if (string[i] != tail[j])
+ break;
+ }
+ else
+ {
+ for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
+ if (TOLOWER (string[i]) != TOLOWER (tail[j]))
+ break;
+ }
+
+ /* If the tail was exhausted, the match was succesful. */
if (j == -1)
return 1;
else
{
if (backward)
{
- if (match_tail (s, *accepts))
+ if (match_tail (s, *accepts, 0))
return 1;
}
else
return NULL;
}
+/* Return non-zero if FNAME ends with a typical HTML suffix. The
+ following (case-insensitive) suffixes are presumed to be HTML files:
+
+ html
+ htm
+ ?html (`?' matches one character)
+
+ #### CAVEAT. This is not necessarily a good indication that FNAME
+ refers to a file that contains HTML! */
+int
+has_html_suffix_p (const char *fname)
+{
+ char *suf;
+
+ if ((suf = suffix (fname)) == NULL)
+ return 0;
+ if (!strcasecmp (suf, "html"))
+ return 1;
+ if (!strcasecmp (suf, "htm"))
+ return 1;
+ if (suf[0] && !strcasecmp (suf + 1, "html"))
+ return 1;
+ return 0;
+}
+
/* Read a line from FP and return the pointer to freshly allocated
storage. The stoarage space is obtained through malloc() and
should be freed with free() when it is no longer needed.
return cnt;
}
+/* A half-assed implementation of INT_MAX on machines that don't
+ bother to define one. */
+#ifndef INT_MAX
+# define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
+#endif
+
#define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
#define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
if (n < 0)
{
+ if (n < -INT_MAX)
+ {
+ /* We cannot print a '-' and assign -n to n because -n would
+ overflow. Let sprintf deal with this border case. */
+ sprintf (buffer, "%ld", n);
+ p += strlen (buffer);
+ return p;
+ }
+
*p++ = '-';
n = -n;
}
#endif /* TIOCGWINSZ */
}
+/* Return a random number between 0 and MAX-1, inclusive.
+
+ If MAX is greater than the value of RAND_MAX+1 on the system, the
+ returned value will be in the range [0, RAND_MAX]. This may be
+ fixed in a future release.
+
+ The random number generator is seeded automatically the first time
+ it is called.
+
+ This uses rand() for portability. It has been suggested that
+ random() offers better randomness, but this is not required for
+ Wget, so I chose to go for simplicity and use rand
+ unconditionally. */
+
+int
+random_number (int max)
+{
+ static int seeded;
+ double bounded;
+ int rnd;
+
+ if (!seeded)
+ {
+ srand (time (NULL));
+ seeded = 1;
+ }
+ rnd = rand ();
+
+ /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
+ and enforce that assumption by masking other bits. */
+#ifndef RAND_MAX
+# define RAND_MAX 32767
+ rnd &= RAND_MAX;
+#endif
+
+ /* This is equivalent to rand() % max, but uses the high-order bits
+ for better randomness on architecture where rand() is implemented
+ using a simple congruential generator. */
+
+ bounded = (double)max * rnd / (RAND_MAX + 1.0);
+ return (int)bounded;
+}
+
#if 0
/* A debugging function for checking whether an MD5 library works. */
return res;
}
#endif
+\f
+/* Implementation of run_with_timeout, a generic timeout handler for
+ systems with Unix-like signal handling. */
+#ifdef USE_SIGNAL_TIMEOUT
+# ifdef HAVE_SIGSETJMP
+# define SETJMP(env) sigsetjmp (env, 1)
-#if 0
-/* Debugging and testing support for path_simplify. */
+static sigjmp_buf run_with_timeout_env;
-/* Debug: run path_simplify on PATH and return the result in a new
- string. Useful for calling from the debugger. */
-static char *
-ps (char *path)
+static RETSIGTYPE
+abort_run_with_timeout (int sig)
{
- char *copy = xstrdup (path);
- path_simplify (copy);
- return copy;
+ assert (sig == SIGALRM);
+ siglongjmp (run_with_timeout_env, -1);
}
+# else /* not HAVE_SIGSETJMP */
+# define SETJMP(env) setjmp (env)
-static void
-run_test (char *test, char *expected_result, int expected_change)
+static jmp_buf run_with_timeout_env;
+
+static RETSIGTYPE
+abort_run_with_timeout (int sig)
{
- char *test_copy = xstrdup (test);
- int modified = path_simplify (test_copy);
+ assert (sig == SIGALRM);
+ /* We don't have siglongjmp to preserve the set of blocked signals;
+ if we longjumped out of the handler at this point, SIGALRM would
+ remain blocked. We must unblock it manually. */
+ int mask = siggetmask ();
+ mask &= ~sigmask(SIGALRM);
+ sigsetmask (mask);
- if (0 != strcmp (test_copy, expected_result))
- {
- printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
- test, expected_result, test_copy);
- }
- if (modified != expected_change)
- {
- if (expected_change == 1)
- printf ("Expected no modification with path_simplify(\"%s\").\n",
- test);
- else
- printf ("Expected modification with path_simplify(\"%s\").\n",
- test);
- }
- xfree (test_copy);
+ /* Now it's safe to longjump. */
+ longjmp (run_with_timeout_env, -1);
}
+# endif /* not HAVE_SIGSETJMP */
+#endif /* USE_SIGNAL_TIMEOUT */
-static void
-test_path_simplify (void)
-{
- static struct {
- char *test, *result;
- int should_modify;
- } tests[] = {
- { "", "", 0 },
- { ".", "", 1 },
- { "..", "", 1 },
- { "foo", "foo", 0 },
- { "foo/bar", "foo/bar", 0 },
- { "foo///bar", "foo/bar", 1 },
- { "foo/.", "foo/", 1 },
- { "foo/./", "foo/", 1 },
- { "foo./", "foo./", 0 },
- { "foo/../bar", "bar", 1 },
- { "foo/../bar/", "bar/", 1 },
- { "foo/bar/..", "foo/", 1 },
- { "foo/bar/../x", "foo/x", 1 },
- { "foo/bar/../x/", "foo/x/", 1 },
- { "foo/..", "", 1 },
- { "foo/../..", "", 1 },
- { "a/b/../../c", "c", 1 },
- { "./a/../b", "b", 1 }
- };
- int i;
+int
+run_with_timeout (long timeout, void (*fun) (void *), void *arg)
+{
+#ifndef USE_SIGNAL_TIMEOUT
+ fun (arg);
+ return 0;
+#else
+ int saved_errno;
- for (i = 0; i < ARRAY_SIZE (tests); i++)
+ if (timeout == 0)
{
- char *test = tests[i].test;
- char *expected_result = tests[i].result;
- int expected_change = tests[i].should_modify;
- run_test (test, expected_result, expected_change);
+ fun (arg);
+ return 0;
}
- /* Now run all the tests with a leading slash before the test case,
- to prove that the slash is being preserved. */
- for (i = 0; i < ARRAY_SIZE (tests); i++)
+ signal (SIGALRM, abort_run_with_timeout);
+ if (SETJMP (run_with_timeout_env) != 0)
{
- char *test, *expected_result;
- int expected_change = tests[i].should_modify;
-
- test = xmalloc (1 + strlen (tests[i].test) + 1);
- sprintf (test, "/%s", tests[i].test);
-
- expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
- sprintf (expected_result, "/%s", tests[i].result);
+ /* Longjumped out of FUN with a timeout. */
+ signal (SIGALRM, SIG_DFL);
+ return 1;
+ }
+ alarm (timeout);
+ fun (arg);
- run_test (test, expected_result, expected_change);
+ /* Preserve errno in case alarm() or signal() modifies it. */
+ saved_errno = errno;
+ alarm (0);
+ signal (SIGALRM, SIG_DFL);
+ errno = saved_errno;
- xfree (test);
- xfree (expected_result);
- }
-}
+ return 0;
#endif
+}
+