/* Various utility functions.
- Copyright (C) 2003 Free Software Foundation, Inc.
+ Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GNU Wget.
return copy;
}
-/* Return a count of how many times CHR occurs in STRING. */
-
-int
-count_char (const char *string, char chr)
-{
- const char *p;
- int count = 0;
- for (p = string; *p; p++)
- if (*p == chr)
- ++count;
- return count;
-}
-
/* Copy the string formed by two pointers (one on the beginning, other
on the char after the last char) to a new, malloc-ed location.
0-terminate it. */
}
return NULL; /* unreached */
}
+
+/* Concatenate the NULL-terminated list of string arguments into
+ freshly allocated space. */
+
+char *
+concat_strings (const char *str0, ...)
+{
+ va_list args;
+ int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
+ char *ret, *p;
+
+ const char *next_str;
+ int total_length = 0;
+ int argcount;
+
+ /* Calculate the length of and allocate the resulting string. */
+
+ argcount = 0;
+ VA_START (args, str0);
+ for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
+ {
+ int len = strlen (next_str);
+ if (argcount < countof (saved_lengths))
+ saved_lengths[argcount++] = len;
+ total_length += len;
+ }
+ va_end (args);
+ p = ret = xmalloc (total_length + 1);
+
+ /* Copy the strings into the allocated space. */
+
+ argcount = 0;
+ VA_START (args, str0);
+ for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
+ {
+ int len;
+ if (argcount < countof (saved_lengths))
+ len = saved_lengths[argcount++];
+ else
+ len = strlen (next_str);
+ memcpy (p, next_str, len);
+ p += len;
+ }
+ va_end (args);
+ *p = '\0';
+
+ return ret;
+}
\f
/* Return pointer to a static char[] buffer in which zero-terminated
string-representation of TM (in form hh:mm:ss) is printed.
{
pid_t pid;
/* Whether we arrange our own version of opt.lfilename here. */
- int changedp = 0;
+ int logfile_changed = 0;
if (!opt.lfilename)
{
- opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
- changedp = 1;
+ /* We must create the file immediately to avoid either a race
+ condition (which arises from using unique_name and failing to
+ use fopen_excl) or lying to the user about the log file name
+ (which arises from using unique_name, printing the name, and
+ using fopen_excl later on.) */
+ FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, 0, &opt.lfilename);
+ if (new_log_fp)
+ {
+ logfile_changed = 1;
+ fclose (new_log_fp);
+ }
}
pid = fork ();
if (pid < 0)
{
/* parent, no error */
printf (_("Continuing in background, pid %d.\n"), (int)pid);
- if (changedp)
+ if (logfile_changed)
printf (_("Output will be written to `%s'.\n"), opt.lfilename);
exit (0); /* #### should we use _exit()? */
}
exist at the point in time when the function was called.
Therefore, where security matters, don't rely that the file created
by this function exists until you open it with O_EXCL or
- something.
+ equivalent.
If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
string. Otherwise, it may return FILE if the file doesn't exist
and return it. */
return unique_name_1 (file);
}
+
+/* Create a file based on NAME, except without overwriting an existing
+ file with that name. Providing O_EXCL is correctly implemented,
+ this function does not have the race condition associated with
+ opening the file returned by unique_name. */
+
+FILE *
+unique_create (const char *name, int binary, char **opened_name)
+{
+ /* unique file name, based on NAME */
+ char *uname = unique_name (name, 0);
+ FILE *fp;
+ while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
+ {
+ xfree (uname);
+ uname = unique_name (name, 0);
+ }
+ if (opened_name && fp != NULL)
+ {
+ if (fp)
+ *opened_name = uname;
+ else
+ {
+ *opened_name = NULL;
+ xfree (uname);
+ }
+ }
+ else
+ xfree (uname);
+ return fp;
+}
+
+/* Open the file for writing, with the addition that the file is
+ opened "exclusively". This means that, if the file already exists,
+ this function will *fail* and errno will be set to EEXIST. If
+ BINARY is set, the file will be opened in binary mode, equivalent
+ to fopen's "wb".
+
+ If opening the file fails for any reason, including the file having
+ previously existed, this function returns NULL and sets errno
+ appropriately. */
+
+FILE *
+fopen_excl (const char *fname, int binary)
+{
+ int fd;
+#ifdef O_EXCL
+ int flags = O_WRONLY | O_CREAT | O_EXCL;
+# ifdef O_BINARY
+ if (binary)
+ flags |= O_BINARY;
+# endif
+ fd = open (fname, flags, 0666);
+ if (fd < 0)
+ return NULL;
+ return fdopen (fd, binary ? "wb" : "w");
+#else /* not O_EXCL */
+ /* Manually check whether the file exists. This is prone to race
+ conditions, but systems without O_EXCL haven't deserved
+ better. */
+ if (file_exists_p (fname))
+ {
+ errno = EEXIST;
+ return NULL;
+ }
+ return fopen (fname, binary ? "wb" : "w");
+#endif /* not O_EXCL */
+}
\f
/* Create DIRECTORY. If some of the pathname components of DIRECTORY
are missing, create them first. In case any mkdir() call fails,
int
make_directory (const char *directory)
{
- int quit = 0;
- int i;
- int ret = 0;
+ int i, ret, quit = 0;
char *dir;
/* Make a copy of dir, to be able to write to it. Otherwise, the
{
int fd;
struct file_memory *fm;
- wgint size;
+ long size;
int inhibit_close = 0;
/* Some magic in the finest tradition of Perl and its kin: if FILE
xfree (v2);
return v1;
}
-
-/* A set of simple-minded routines to store strings in a linked list.
- This used to also be used for searching, but now we have hash
- tables for that. */
-
-/* It's a shame that these simple things like linked lists and hash
- tables (see hash.c) need to be implemented over and over again. It
- would be nice to be able to use the routines from glib -- see
- www.gtk.org for details. However, that would make Wget depend on
- glib, and I want to avoid dependencies to external libraries for
- reasons of convenience and portability (I suspect Wget is more
- portable than anything ever written for Gnome). */
-
-/* Append an element to the list. If the list has a huge number of
- elements, this can get slow because it has to find the list's
- ending. If you think you have to call slist_append in a loop,
- think about calling slist_prepend() followed by slist_nreverse(). */
-
-slist *
-slist_append (slist *l, const char *s)
-{
- slist *newel = xnew (slist);
- slist *beg = l;
-
- newel->string = xstrdup (s);
- newel->next = NULL;
-
- if (!l)
- return newel;
- /* Find the last element. */
- while (l->next)
- l = l->next;
- l->next = newel;
- return beg;
-}
-
-/* Prepend S to the list. Unlike slist_append(), this is O(1). */
-
-slist *
-slist_prepend (slist *l, const char *s)
-{
- slist *newel = xnew (slist);
- newel->string = xstrdup (s);
- newel->next = l;
- return newel;
-}
-
-/* Destructively reverse L. */
-
-slist *
-slist_nreverse (slist *l)
-{
- slist *prev = NULL;
- while (l)
- {
- slist *next = l->next;
- l->next = prev;
- prev = l;
- l = next;
- }
- return prev;
-}
-
-/* Is there a specific entry in the list? */
-int
-slist_contains (slist *l, const char *s)
-{
- for (; l; l = l->next)
- if (!strcmp (l->string, s))
- return 1;
- return 0;
-}
-
-/* Free the whole slist. */
-void
-slist_free (slist *l)
-{
- while (l)
- {
- slist *n = l->next;
- xfree (l->string);
- xfree (l);
- l = n;
- }
-}
\f
/* Sometimes it's useful to create "sets" of strings, i.e. special
hash tables where you want to store strings as keys and merely
return hash_table_contains (ht, s);
}
+static int
+string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
+{
+ char ***arrayptr = (char ***) arg;
+ *(*arrayptr)++ = (char *) key;
+ return 0;
+}
+
+/* Convert the specified string set to array. ARRAY should be large
+ enough to hold hash_table_count(ht) char pointers. */
+
+void string_set_to_array (struct hash_table *ht, char **array)
+{
+ hash_table_map (ht, string_set_to_array_mapper, &array);
+}
+
static int
string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
{
}
\f
-/* Engine for legible and legible_large_int; add thousand separators
- to numbers printed in strings. */
+/* Add thousand separators to a number already in string form. Used
+ by with_thousand_seps and with_thousand_seps_large. */
static char *
-legible_1 (const char *repr)
+add_thousand_seps (const char *repr)
{
static char outbuf[48];
int i, i1, mod;
return outbuf;
}
-/* Legible -- return a static pointer to the legibly printed wgint. */
+/* Return a static pointer to the number printed with thousand
+ separators inserted at the right places. */
char *
-legible (wgint l)
+with_thousand_seps (wgint l)
{
char inbuf[24];
/* Print the number into the buffer. */
number_to_string (inbuf, l);
- return legible_1 (inbuf);
+ return add_thousand_seps (inbuf);
}
/* Write a string representation of LARGE_INT NUMBER into the provided
- buffer. The buffer should be able to accept 24 characters,
- including the terminating zero.
+ buffer.
It would be dangerous to use sprintf, because the code wouldn't
work on a machine with gcc-provided long long support, but without
- libc support for "%lld". However, such platforms will typically
- not have snprintf and will use our version, which does support
- "%lld" where long longs are available. */
+ libc support for "%lld". However, such old systems platforms
+ typically lack snprintf and will end up using our version, which
+ does support "%lld" whereever long longs are available. */
static void
-large_int_to_string (char *buffer, LARGE_INT number)
+large_int_to_string (char *buffer, int bufsize, LARGE_INT number)
{
- snprintf (buffer, 24, LARGE_INT_FMT, number);
+ snprintf (buffer, bufsize, LARGE_INT_FMT, number);
}
-/* The same as legible(), but works on LARGE_INT. */
+/* The same as with_thousand_seps, but works on LARGE_INT. */
char *
-legible_large_int (LARGE_INT l)
+with_thousand_seps_large (LARGE_INT l)
{
char inbuf[48];
- large_int_to_string (inbuf, l);
- return legible_1 (inbuf);
+ large_int_to_string (inbuf, sizeof (inbuf), l);
+ return add_thousand_seps (inbuf);
+}
+
+/* N, a byte quantity, is converted to a human-readable abberviated
+ form a la sizes printed by `ls -lh'. The result is written to a
+ static buffer, a pointer to which is returned.
+
+ Unlike `with_thousand_seps', this approximates to the nearest unit.
+ Quoting GNU libit: "Most people visually process strings of 3-4
+ digits effectively, but longer strings of digits are more prone to
+ misinterpretation. Hence, converting to an abbreviated form
+ usually improves readability."
+
+ This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
+ original computer science meaning of "multiples of 1024".
+ Multiples of 1000 would be useless since Wget already adds thousand
+ separators for legibility. We don't use the "*bibyte" names
+ invented in 1998, and seldom used in practice. Wikipedia's entry
+ on kilobyte discusses this in some detail. */
+
+char *
+human_readable (wgint n)
+{
+ /* These suffixes are compatible with those of GNU `ls -lh'. */
+ static char powers[] =
+ {
+ 'K', /* kilobyte, 2^10 bytes */
+ 'M', /* megabyte, 2^20 bytes */
+ 'G', /* gigabyte, 2^30 bytes */
+ 'T', /* terabyte, 2^40 bytes */
+ 'P', /* petabyte, 2^50 bytes */
+ 'E', /* exabyte, 2^60 bytes */
+ };
+ static char buf[8];
+ int i;
+
+ /* If the quantity is smaller than 1K, just print it. */
+ if (n < 1024)
+ {
+ snprintf (buf, sizeof (buf), "%d", (int) n);
+ return buf;
+ }
+
+ /* Loop over powers, dividing N with 1024 in each iteration. This
+ works unchanged for all sizes of wgint, while still avoiding
+ non-portable `long double' arithmetic. */
+ for (i = 0; i < countof (powers); i++)
+ {
+ /* At each iteration N is greater than the *subsequent* power.
+ That way N/1024.0 produces a decimal number in the units of
+ *this* power. */
+ if ((n >> 10) < 1024 || i == countof (powers) - 1)
+ {
+ /* Must cast to long first because MS VC can't directly cast
+ __int64 to double. (This is safe because N is known to
+ be <2**20.) */
+ double val = (double) (long) n / 1024.0;
+ /* Print values smaller than 10 with one decimal digits, and
+ others without any decimals. */
+ snprintf (buf, sizeof (buf), "%.*f%c",
+ val < 10 ? 1 : 0, val, powers[i]);
+ return buf;
+ }
+ n >>= 10;
+ }
+ return NULL; /* unreached */
}
-/* Count the digits in an integer number. */
+/* Count the digits in the provided number. Used to allocate space
+ when printing numbers. */
+
int
numdigit (wgint number)
{
int cnt = 1;
if (number < 0)
- {
- number = -number;
- ++cnt;
- }
- while ((number /= 10) > 0)
+ ++cnt; /* accomodate '-' */
+ while ((number /= 10) != 0)
++cnt;
return cnt;
}
# define C100000000000000000 100000000000000000LL
# define C1000000000000000000 1000000000000000000LL
# else
-# if defined(_MSC_VER) || defined(__WATCOM__)
-/* Otherwise, if __int64 is available (under Windows), use __int64
- constants. */
+# if defined(WINDOWS)
+/* Use __int64 constants under Windows. */
# define C10000000000 10000000000I64
# define C100000000000 100000000000I64
# define C1000000000000 1000000000000I64
# if SIZEOF_LONG_LONG >= SIZEOF_WGINT
# define SPRINTF_WGINT(buf, n) sprintf(buf, "%lld", (long long) (n))
# else
-# ifdef _MSC_VER
+# ifdef WINDOWS
# define SPRINTF_WGINT(buf, n) sprintf(buf, "%I64", (__int64) (n))
# endif
# endif
#endif
/* Print NUMBER to BUFFER in base 10. This is equivalent to
- `sprintf(buffer, "%lld", (long long) number)', only much faster and
- portable to machines without long long.
+ `sprintf(buffer, "%lld", (long long) number)', only typically much
+ faster and portable to machines without long long.
The speedup may make a difference in programs that frequently
convert numbers to strings. Some implementations of sprintf,
only one of the above constants will be defined. Virtually all
modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
- non-Windows systems without gettimeofday.
-
- #### Perhaps we should also support ftime(), which exists on old
- BSD 4.2-influenced systems? (It also existed under MS DOS Borland
- C, if memory serves me.) */
+ non-Windows systems without gettimeofday. */
#ifdef WINDOWS
# define TIMER_WINDOWS
#endif
#ifdef TIMER_WINDOWS
-typedef ULARGE_INTEGER wget_sys_time;
+typedef union {
+ DWORD lores; /* In case GetTickCount is used */
+ LARGE_INTEGER hires; /* In case high-resolution timer is used */
+} wget_sys_time;
#endif
struct wget_timer {
double elapsed_pre_start;
};
+#ifdef TIMER_WINDOWS
+
+/* Whether high-resolution timers are used. Set by wtimer_initialize_once
+ the first time wtimer_allocate is called. */
+static int using_hires_timers;
+
+/* Frequency of high-resolution timers -- number of updates per
+ millisecond. Calculated the first time wtimer_allocate is called
+ provided that high-resolution timers are available. */
+static double hires_millisec_freq;
+
+/* The first time a timer is created, determine whether to use
+ high-resolution timers. */
+
+static void
+wtimer_initialize_once (void)
+{
+ static int init_done;
+ if (!init_done)
+ {
+ LARGE_INTEGER freq;
+ init_done = 1;
+ freq.QuadPart = 0;
+ QueryPerformanceFrequency (&freq);
+ if (freq.QuadPart != 0)
+ {
+ using_hires_timers = 1;
+ hires_millisec_freq = (double) freq.QuadPart / 1000.0;
+ }
+ }
+}
+#endif /* TIMER_WINDOWS */
+
/* Allocate a timer. Calling wtimer_read on the timer will return
zero. It is not legal to call wtimer_update with a freshly
allocated timer -- use wtimer_reset first. */
{
struct wget_timer *wt = xnew (struct wget_timer);
xzero (*wt);
+
+#ifdef TIMER_WINDOWS
+ wtimer_initialize_once ();
+#endif
+
return wt;
}
#endif
#ifdef TIMER_WINDOWS
- /* We use GetSystemTime to get the elapsed time. MSDN warns that
- system clock adjustments can skew the output of GetSystemTime
- when used as a timer and gives preference to GetTickCount and
- high-resolution timers. But GetTickCount can overflow, and hires
- timers are typically used for profiling, not for regular time
- measurement. Since we handle clock skew anyway, we just use
- GetSystemTime. */
- FILETIME ft;
- SYSTEMTIME st;
- GetSystemTime (&st);
-
- /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
- FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
- arithmetic on that. */
- SystemTimeToFileTime (&st, &ft);
- wst->HighPart = ft.dwHighDateTime;
- wst->LowPart = ft.dwLowDateTime;
+ if (using_hires_timers)
+ {
+ QueryPerformanceCounter (&wst->hires);
+ }
+ else
+ {
+ /* Where hires counters are not available, use GetTickCount rather
+ GetSystemTime, because it is unaffected by clock skew and simpler
+ to use. Note that overflows don't affect us because we never use
+ absolute values of the ticker, only the differences. */
+ wst->lores = GetTickCount ();
+ }
#endif
}
/* Reset timer WT. This establishes the starting point from which
wtimer_elapsed() will return the number of elapsed milliseconds.
- It is allowed to reset a previously used timer.
-
- If a non-zero value is used as START, the timer's values will be
- offset by START. */
+ It is allowed to reset a previously used timer. */
void
wtimer_reset (struct wget_timer *wt)
#endif
#ifdef WINDOWS
- /* VC++ 6 doesn't support direct cast of uint64 to double. To work
- around this, we subtract, then convert to signed, then finally to
- double. */
- return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
+ if (using_hires_timers)
+ return (wst1->hires.QuadPart - wst2->hires.QuadPart) / hires_millisec_freq;
+ else
+ return wst1->lores - wst2->lores;
#endif
}
#endif
#ifdef TIMER_WINDOWS
- /* According to MSDN, GetSystemTime returns a broken-down time
- structure the smallest member of which are milliseconds. */
- return 1;
+ if (using_hires_timers)
+ return 1.0 / hires_millisec_freq;
+ else
+ return 10; /* according to MSDN */
#endif
}
\f
int rnd3 = random_number (1000);
return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
}
-
-#if 0
-/* A debugging function for checking whether an MD5 library works. */
-
-#include "gen-md5.h"
-
-char *
-debug_test_md5 (char *buf)
-{
- unsigned char raw[16];
- static char res[33];
- unsigned char *p1;
- char *p2;
- int cnt;
- ALLOCA_MD5_CONTEXT (ctx);
-
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
- gen_md5_finish (ctx, raw);
-
- p1 = raw;
- p2 = res;
- cnt = 16;
- while (cnt--)
- {
- *p2++ = XNUM_TO_digit (*p1 >> 4);
- *p2++ = XNUM_TO_digit (*p1 & 0xf);
- ++p1;
- }
- *p2 = '\0';
-
- return res;
-}
-#endif
\f
/* Implementation of run_with_timeout, a generic timeout-forcing
routine for systems with Unix-like signal handling. */
#ifdef HAVE_NANOSLEEP
/* nanosleep is the preferred interface because it offers high
accuracy and, more importantly, because it allows us to reliably
- restart after having been interrupted by a signal such as
- SIGWINCH. */
+ restart receiving a signal such as SIGWINCH. (There was an
+ actual Debian bug report about --limit-rate malfunctioning while
+ the terminal was being resized.) */
struct timespec sleep, remaining;
sleep.tv_sec = (long) seconds;
sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);