You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
# include <termios.h>
#endif
+/* Needed for run_with_timeout. */
+#undef USE_SIGNAL_TIMEOUT
+#ifdef HAVE_SIGNAL_H
+# include <signal.h>
+#endif
+#ifdef HAVE_SETJMP_H
+# include <setjmp.h>
+#endif
+/* If sigsetjmp is a macro, configure won't pick it up. */
+#ifdef sigsetjmp
+# define HAVE_SIGSETJMP
+#endif
+#ifdef HAVE_SIGNAL
+# ifdef HAVE_SIGSETJMP
+# define USE_SIGNAL_TIMEOUT
+# endif
+# ifdef HAVE_SIGBLOCK
+# define USE_SIGNAL_TIMEOUT
+# endif
+#endif
+
#include "wget.h"
#include "utils.h"
#include "fnmatch.h"
return S_ISDIR (buf.st_mode) ? 0 : 1;
}
+/* Return the size of file named by FILENAME, or -1 if it cannot be
+ opened or seeked into. */
+long
+file_size (const char *filename)
+{
+ long size;
+ /* We use fseek rather than stat to determine the file size because
+ that way we can also verify whether the file is readable.
+ Inspired by the POST patch by Arnaud Wylie. */
+ FILE *fp = fopen (filename, "rb");
+ fseek (fp, 0, SEEK_END);
+ size = ftell (fp);
+ fclose (fp);
+ return size;
+}
+
/* Return a unique filename, given a prefix and count */
static char *
unique_name_1 (const char *fileprefix, int count)
{
int quit = 0;
int i;
+ int ret = 0;
char *dir;
/* Make a copy of dir, to be able to write to it. Otherwise, the
if (!dir[i])
quit = 1;
dir[i] = '\0';
- /* Check whether the directory already exists. */
+ /* Check whether the directory already exists. Allow creation of
+ of intermediate directories to fail, as the initial path components
+ are not necessarily directories! */
if (!file_exists_p (dir))
- {
- if (mkdir (dir, 0777) < 0)
- return -1;
- }
+ ret = mkdir (dir, 0777);
+ else
+ ret = 0;
if (quit)
break;
else
dir[i] = '/';
}
- return 0;
+ return ret;
}
/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
return 1;
}
-/* Match the end of STRING against PATTERN. For instance:
+/* Return non-zero if STRING ends with TAIL. For instance:
+
+ match_tail ("abc", "bc", 0) -> 1
+ match_tail ("abc", "ab", 0) -> 0
+ match_tail ("abc", "abc", 0) -> 1
+
+ If FOLD_CASE_P is non-zero, the comparison will be
+ case-insensitive. */
- match_backwards ("abc", "bc") -> 1
- match_backwards ("abc", "ab") -> 0
- match_backwards ("abc", "abc") -> 1 */
int
-match_tail (const char *string, const char *pattern)
+match_tail (const char *string, const char *tail, int fold_case_p)
{
int i, j;
- for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
- if (string[i] != pattern[j])
- break;
- /* If the pattern was exhausted, the match was succesful. */
+ /* We want this to be fast, so we code two loops, one with
+ case-folding, one without. */
+
+ if (!fold_case_p)
+ {
+ for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
+ if (string[i] != tail[j])
+ break;
+ }
+ else
+ {
+ for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
+ if (TOLOWER (string[i]) != TOLOWER (tail[j]))
+ break;
+ }
+
+ /* If the tail was exhausted, the match was succesful. */
if (j == -1)
return 1;
else
{
if (backward)
{
- if (match_tail (s, *accepts))
+ if (match_tail (s, *accepts, 0))
return 1;
}
else
return NULL;
}
+/* Return non-zero if FNAME ends with a typical HTML suffix. The
+ following (case-insensitive) suffixes are presumed to be HTML files:
+
+ html
+ htm
+ ?html (`?' matches one character)
+
+ #### CAVEAT. This is not necessarily a good indication that FNAME
+ refers to a file that contains HTML! */
+int
+has_html_suffix_p (const char *fname)
+{
+ char *suf;
+
+ if ((suf = suffix (fname)) == NULL)
+ return 0;
+ if (!strcasecmp (suf, "html"))
+ return 1;
+ if (!strcasecmp (suf, "htm"))
+ return 1;
+ if (suf[0] && !strcasecmp (suf + 1, "html"))
+ return 1;
+ return 0;
+}
+
/* Read a line from FP and return the pointer to freshly allocated
storage. The stoarage space is obtained through malloc() and
should be freed with free() when it is no longer needed.
return cnt;
}
+/* A half-assed implementation of INT_MAX on machines that don't
+ bother to define one. */
+#ifndef INT_MAX
+# define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
+#endif
+
#define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
#define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
if (n < 0)
{
+ if (n < -INT_MAX)
+ {
+ /* We cannot print a '-' and assign -n to n because -n would
+ overflow. Let sprintf deal with this border case. */
+ sprintf (buffer, "%ld", n);
+ p += strlen (buffer);
+ return p;
+ }
+
*p++ = '-';
n = -n;
}
# endif
#endif /* not WINDOWS */
-struct wget_timer {
#ifdef TIMER_GETTIMEOFDAY
- long secs;
- long usecs;
+typedef struct timeval wget_sys_time;
#endif
#ifdef TIMER_TIME
- time_t secs;
+typedef time_t wget_sys_time;
#endif
#ifdef TIMER_WINDOWS
- ULARGE_INTEGER wintime;
+typedef ULARGE_INTEGER wget_sys_time;
#endif
+
+struct wget_timer {
+ /* The starting point in time which, subtracted from the current
+ time, yields elapsed time. */
+ wget_sys_time start;
+
+ /* The most recent elapsed time, calculated by wtimer_elapsed().
+ Measured in milliseconds. */
+ double elapsed_last;
+
+ /* Approximately, the time elapsed between the true start of the
+ measurement and the time represented by START. */
+ double elapsed_pre_start;
};
/* Allocate a timer. It is not legal to do anything with a freshly
xfree (wt);
}
-/* Reset timer WT. This establishes the starting point from which
- wtimer_elapsed() will return the number of elapsed
- milliseconds. It is allowed to reset a previously used timer. */
+/* Store system time to WST. */
-void
-wtimer_reset (struct wget_timer *wt)
+static void
+wtimer_sys_set (wget_sys_time *wst)
{
#ifdef TIMER_GETTIMEOFDAY
- struct timeval t;
- gettimeofday (&t, NULL);
- wt->secs = t.tv_sec;
- wt->usecs = t.tv_usec;
+ gettimeofday (wst, NULL);
#endif
#ifdef TIMER_TIME
- wt->secs = time (NULL);
+ time (wst);
#endif
#ifdef TIMER_WINDOWS
+ /* We use GetSystemTime to get the elapsed time. MSDN warns that
+ system clock adjustments can skew the output of GetSystemTime
+ when used as a timer and gives preference to GetTickCount and
+ high-resolution timers. But GetTickCount can overflow, and hires
+ timers are typically used for profiling, not for regular time
+ measurement. Since we handle clock skew anyway, we just use
+ GetSystemTime. */
FILETIME ft;
SYSTEMTIME st;
GetSystemTime (&st);
+
+ /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
+ FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
+ arithmetic on that. */
SystemTimeToFileTime (&st, &ft);
- wt->wintime.HighPart = ft.dwHighDateTime;
- wt->wintime.LowPart = ft.dwLowDateTime;
+ wst->HighPart = ft.dwHighDateTime;
+ wst->LowPart = ft.dwLowDateTime;
#endif
}
-/* Return the number of milliseconds elapsed since the timer was last
- reset. It is allowed to call this function more than once to get
- increasingly higher elapsed values. */
+/* Reset timer WT. This establishes the starting point from which
+ wtimer_elapsed() will return the number of elapsed
+ milliseconds. It is allowed to reset a previously used timer. */
-long
-wtimer_elapsed (struct wget_timer *wt)
+void
+wtimer_reset (struct wget_timer *wt)
+{
+ /* Set the start time to the current time. */
+ wtimer_sys_set (&wt->start);
+ wt->elapsed_last = 0;
+ wt->elapsed_pre_start = 0;
+}
+
+static double
+wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
{
#ifdef TIMER_GETTIMEOFDAY
- struct timeval t;
- gettimeofday (&t, NULL);
- return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
+ return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
+ + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
#endif
#ifdef TIMER_TIME
- time_t now = time (NULL);
- return 1000 * (now - wt->secs);
+ return 1000 * (*wst1 - *wst2);
#endif
#ifdef WINDOWS
- FILETIME ft;
- SYSTEMTIME st;
- ULARGE_INTEGER uli;
- GetSystemTime (&st);
- SystemTimeToFileTime (&st, &ft);
- uli.HighPart = ft.dwHighDateTime;
- uli.LowPart = ft.dwLowDateTime;
- return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
+ return (double)(wst1->QuadPart - wst2->QuadPart) / 10000;
#endif
}
-/* Return the assessed granularity of the timer implementation. This
- is important for certain code that tries to deal with "zero" time
- intervals. */
+/* Return the number of milliseconds elapsed since the timer was last
+ reset. It is allowed to call this function more than once to get
+ increasingly higher elapsed values. These timers handle clock
+ skew. */
-long
+double
+wtimer_elapsed (struct wget_timer *wt)
+{
+ wget_sys_time now;
+ double elapsed;
+
+ wtimer_sys_set (&now);
+ elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
+
+ /* Ideally we'd just return the difference between NOW and
+ wt->start. However, the system timer can be set back, and we
+ could return a value smaller than when we were last called, even
+ a negative value. Both of these would confuse the callers, which
+ expect us to return monotonically nondecreasing values.
+
+ Therefore: if ELAPSED is smaller than its previous known value,
+ we reset wt->start to the current time and effectively start
+ measuring from this point. But since we don't want the elapsed
+ value to start from zero, we set elapsed_pre_start to the last
+ elapsed time and increment all future calculations by that
+ amount. */
+
+ if (elapsed < wt->elapsed_last)
+ {
+ wt->start = now;
+ wt->elapsed_pre_start = wt->elapsed_last;
+ elapsed = wt->elapsed_last;
+ }
+
+ wt->elapsed_last = elapsed;
+ return elapsed;
+}
+
+/* Return the assessed granularity of the timer implementation, in
+ milliseconds. This is used by code that tries to substitute a
+ better value for timers that have returned zero. */
+
+double
wtimer_granularity (void)
{
#ifdef TIMER_GETTIMEOFDAY
- /* Granularity of gettimeofday is hugely architecture-dependent.
- However, it appears that on modern machines it is better than
- 1ms. */
- return 1;
+ /* Granularity of gettimeofday varies wildly between architectures.
+ However, it appears that on modern machines it tends to be better
+ than 1ms. Assume 100 usecs. (Perhaps the configure process
+ could actually measure this?) */
+ return 0.1;
#endif
#ifdef TIMER_TIME
#endif
#ifdef TIMER_WINDOWS
- /* ? */
+ /* According to MSDN, GetSystemTime returns a broken-down time
+ structure the smallest member of which are milliseconds. */
return 1;
#endif
}
#endif /* TIOCGWINSZ */
}
+/* Return a random number between 0 and MAX-1, inclusive.
+
+ If MAX is greater than the value of RAND_MAX+1 on the system, the
+ returned value will be in the range [0, RAND_MAX]. This may be
+ fixed in a future release.
+
+ The random number generator is seeded automatically the first time
+ it is called.
+
+ This uses rand() for portability. It has been suggested that
+ random() offers better randomness, but this is not required for
+ Wget, so I chose to go for simplicity and use rand
+ unconditionally. */
+
+int
+random_number (int max)
+{
+ static int seeded;
+ double bounded;
+ int rnd;
+
+ if (!seeded)
+ {
+ srand (time (NULL));
+ seeded = 1;
+ }
+ rnd = rand ();
+
+ /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
+ and enforce that assumption by masking other bits. */
+#ifndef RAND_MAX
+# define RAND_MAX 32767
+ rnd &= RAND_MAX;
+#endif
+
+ /* This is equivalent to rand() % max, but uses the high-order bits
+ for better randomness on architecture where rand() is implemented
+ using a simple congruential generator. */
+
+ bounded = (double)max * rnd / (RAND_MAX + 1.0);
+ return (int)bounded;
+}
+
#if 0
/* A debugging function for checking whether an MD5 library works. */
return res;
}
#endif
+\f
+/* Implementation of run_with_timeout, a generic timeout handler for
+ systems with Unix-like signal handling. */
+#ifdef USE_SIGNAL_TIMEOUT
+# ifdef HAVE_SIGSETJMP
+# define SETJMP(env) sigsetjmp (env, 1)
+
+static sigjmp_buf run_with_timeout_env;
+
+static RETSIGTYPE
+abort_run_with_timeout (int sig)
+{
+ assert (sig == SIGALRM);
+ siglongjmp (run_with_timeout_env, -1);
+}
+# else /* not HAVE_SIGSETJMP */
+# define SETJMP(env) setjmp (env)
+
+static jmp_buf run_with_timeout_env;
+
+static RETSIGTYPE
+abort_run_with_timeout (int sig)
+{
+ assert (sig == SIGALRM);
+ /* We don't have siglongjmp to preserve the set of blocked signals;
+ if we longjumped out of the handler at this point, SIGALRM would
+ remain blocked. We must unblock it manually. */
+ int mask = siggetmask ();
+ mask &= ~sigmask(SIGALRM);
+ sigsetmask (mask);
+
+ /* Now it's safe to longjump. */
+ longjmp (run_with_timeout_env, -1);
+}
+# endif /* not HAVE_SIGSETJMP */
+#endif /* USE_SIGNAL_TIMEOUT */
+
+int
+run_with_timeout (long timeout, void (*fun) (void *), void *arg)
+{
+#ifndef USE_SIGNAL_TIMEOUT
+ fun (arg);
+ return 0;
+#else
+ int saved_errno;
+
+ if (timeout == 0)
+ {
+ fun (arg);
+ return 0;
+ }
+
+ signal (SIGALRM, abort_run_with_timeout);
+ if (SETJMP (run_with_timeout_env) != 0)
+ {
+ /* Longjumped out of FUN with a timeout. */
+ signal (SIGALRM, SIG_DFL);
+ return 1;
+ }
+ alarm (timeout);
+ fun (arg);
+
+ /* Preserve errno in case alarm() or signal() modifies it. */
+ saved_errno = errno;
+ alarm (0);
+ signal (SIGALRM, SIG_DFL);
+ errno = saved_errno;
+
+ return 0;
+#endif
+}
+