/* Various utility functions.
- Copyright (C) 2003 Free Software Foundation, Inc.
+ Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GNU Wget.
return copy;
}
-/* Return a count of how many times CHR occurs in STRING. */
-
-int
-count_char (const char *string, char chr)
-{
- const char *p;
- int count = 0;
- for (p = string; *p; p++)
- if (*p == chr)
- ++count;
- return count;
-}
-
/* Copy the string formed by two pointers (one on the beginning, other
on the char after the last char) to a new, malloc-ed location.
0-terminate it. */
return NULL;
return fdopen (fd, binary ? "wb" : "w");
#else /* not O_EXCL */
+ /* Manually check whether the file exists. This is prone to race
+ conditions, but systems without O_EXCL haven't deserved
+ better. */
+ if (file_exists_p (fname))
+ {
+ errno = EEXIST;
+ return NULL;
+ }
return fopen (fname, binary ? "wb" : "w");
#endif /* not O_EXCL */
}
xfree (v2);
return v1;
}
-
-/* A set of simple-minded routines to store strings in a linked list.
- This used to also be used for searching, but now we have hash
- tables for that. */
-
-/* It's a shame that these simple things like linked lists and hash
- tables (see hash.c) need to be implemented over and over again. It
- would be nice to be able to use the routines from glib -- see
- www.gtk.org for details. However, that would make Wget depend on
- glib, and I want to avoid dependencies to external libraries for
- reasons of convenience and portability (I suspect Wget is more
- portable than anything ever written for Gnome). */
-
-/* Append an element to the list. If the list has a huge number of
- elements, this can get slow because it has to find the list's
- ending. If you think you have to call slist_append in a loop,
- think about calling slist_prepend() followed by slist_nreverse(). */
-
-slist *
-slist_append (slist *l, const char *s)
-{
- slist *newel = xnew (slist);
- slist *beg = l;
-
- newel->string = xstrdup (s);
- newel->next = NULL;
-
- if (!l)
- return newel;
- /* Find the last element. */
- while (l->next)
- l = l->next;
- l->next = newel;
- return beg;
-}
-
-/* Prepend S to the list. Unlike slist_append(), this is O(1). */
-
-slist *
-slist_prepend (slist *l, const char *s)
-{
- slist *newel = xnew (slist);
- newel->string = xstrdup (s);
- newel->next = l;
- return newel;
-}
-
-/* Destructively reverse L. */
-
-slist *
-slist_nreverse (slist *l)
-{
- slist *prev = NULL;
- while (l)
- {
- slist *next = l->next;
- l->next = prev;
- prev = l;
- l = next;
- }
- return prev;
-}
-
-/* Is there a specific entry in the list? */
-int
-slist_contains (slist *l, const char *s)
-{
- for (; l; l = l->next)
- if (!strcmp (l->string, s))
- return 1;
- return 0;
-}
-
-/* Free the whole slist. */
-void
-slist_free (slist *l)
-{
- while (l)
- {
- slist *n = l->next;
- xfree (l->string);
- xfree (l);
- l = n;
- }
-}
\f
/* Sometimes it's useful to create "sets" of strings, i.e. special
hash tables where you want to store strings as keys and merely
return hash_table_contains (ht, s);
}
+static int
+string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
+{
+ char ***arrayptr = (char ***) arg;
+ *(*arrayptr)++ = (char *) key;
+ return 0;
+}
+
+/* Convert the specified string set to array. ARRAY should be large
+ enough to hold hash_table_count(ht) char pointers. */
+
+void string_set_to_array (struct hash_table *ht, char **array)
+{
+ hash_table_map (ht, string_set_to_array_mapper, &array);
+}
+
static int
string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
{
}
\f
-/* Engine for legible and legible_large_int; add thousand separators
- to numbers printed in strings. */
+/* Add thousand separators to a number already in string form. Used
+ by with_thousand_seps and with_thousand_seps_large. */
static char *
-legible_1 (const char *repr)
+add_thousand_seps (const char *repr)
{
static char outbuf[48];
int i, i1, mod;
return outbuf;
}
-/* Legible -- return a static pointer to the legibly printed wgint. */
+/* Return a static pointer to the number printed with thousand
+ separators inserted at the right places. */
char *
-legible (wgint l)
+with_thousand_seps (wgint l)
{
char inbuf[24];
/* Print the number into the buffer. */
number_to_string (inbuf, l);
- return legible_1 (inbuf);
+ return add_thousand_seps (inbuf);
}
/* Write a string representation of LARGE_INT NUMBER into the provided
- buffer. The buffer should be able to accept 24 characters,
- including the terminating zero.
+ buffer.
It would be dangerous to use sprintf, because the code wouldn't
work on a machine with gcc-provided long long support, but without
- libc support for "%lld". However, such platforms will typically
- not have snprintf and will use our version, which does support
- "%lld" where long longs are available. */
+ libc support for "%lld". However, such old systems platforms
+ typically lack snprintf and will end up using our version, which
+ does support "%lld" whereever long longs are available. */
static void
-large_int_to_string (char *buffer, LARGE_INT number)
+large_int_to_string (char *buffer, int bufsize, LARGE_INT number)
{
- snprintf (buffer, 24, LARGE_INT_FMT, number);
+ snprintf (buffer, bufsize, LARGE_INT_FMT, number);
}
-/* The same as legible(), but works on LARGE_INT. */
+/* The same as with_thousand_seps, but works on LARGE_INT. */
char *
-legible_large_int (LARGE_INT l)
+with_thousand_seps_large (LARGE_INT l)
{
char inbuf[48];
- large_int_to_string (inbuf, l);
- return legible_1 (inbuf);
+ large_int_to_string (inbuf, sizeof (inbuf), l);
+ return add_thousand_seps (inbuf);
+}
+
+/* N, a byte quantity, is converted to a human-readable abberviated
+ form a la sizes printed by `ls -lh'. The result is written to a
+ static buffer, a pointer to which is returned.
+
+ Unlike `with_thousand_seps', this approximates to the nearest unit.
+ Quoting GNU libit: "Most people visually process strings of 3-4
+ digits effectively, but longer strings of digits are more prone to
+ misinterpretation. Hence, converting to an abbreviated form
+ usually improves readability."
+
+ This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
+ original computer science meaning of "multiples of 1024".
+ Multiples of 1000 would be useless since Wget already adds thousand
+ separators for legibility. We don't use the "*bibyte" names
+ invented in 1998, and seldom used in practice. Wikipedia's entry
+ on kilobyte discusses this in some detail. */
+
+char *
+human_readable (wgint n)
+{
+ /* These suffixes are compatible with those of GNU `ls -lh'. */
+ static char powers[] =
+ {
+ 'K', /* kilobyte, 2^10 bytes */
+ 'M', /* megabyte, 2^20 bytes */
+ 'G', /* gigabyte, 2^30 bytes */
+ 'T', /* terabyte, 2^40 bytes */
+ 'P', /* petabyte, 2^50 bytes */
+ 'E', /* exabyte, 2^60 bytes */
+ };
+ static char buf[8];
+ int i;
+
+ /* If the quantity is smaller than 1K, just print it. */
+ if (n < 1024)
+ {
+ snprintf (buf, sizeof (buf), "%d", (int) n);
+ return buf;
+ }
+
+ /* Loop over powers, dividing N with 1024 in each iteration. This
+ works unchanged for all sizes of wgint, while still avoiding
+ non-portable `long double' arithmetic. */
+ for (i = 0; i < countof (powers); i++)
+ {
+ /* At each iteration N is greater than the *subsequent* power.
+ That way N/1024.0 produces a decimal number in the units of
+ *this* power. */
+ if ((n >> 10) < 1024 || i == countof (powers) - 1)
+ {
+ /* Must cast to long first because MS VC can't directly cast
+ __int64 to double. (This is safe because N is known to
+ be <2**20.) */
+ double val = (double) (long) n / 1024.0;
+ /* Print values smaller than 10 with one decimal digits, and
+ others without any decimals. */
+ snprintf (buf, sizeof (buf), "%.*f%c",
+ val < 10 ? 1 : 0, val, powers[i]);
+ return buf;
+ }
+ n >>= 10;
+ }
+ return NULL; /* unreached */
}
-/* Count the digits in an integer number. */
+/* Count the digits in the provided number. Used to allocate space
+ when printing numbers. */
+
int
numdigit (wgint number)
{
int cnt = 1;
if (number < 0)
- {
- number = -number;
- ++cnt;
- }
- while ((number /= 10) > 0)
+ ++cnt; /* accomodate '-' */
+ while ((number /= 10) != 0)
++cnt;
return cnt;
}
#endif
/* Print NUMBER to BUFFER in base 10. This is equivalent to
- `sprintf(buffer, "%lld", (long long) number)', only much faster and
- portable to machines without long long.
+ `sprintf(buffer, "%lld", (long long) number)', only typically much
+ faster and portable to machines without long long.
The speedup may make a difference in programs that frequently
convert numbers to strings. Some implementations of sprintf,
only one of the above constants will be defined. Virtually all
modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
- non-Windows systems without gettimeofday.
-
- #### Perhaps we should also support ftime(), which exists on old
- BSD 4.2-influenced systems? (It also existed under MS DOS Borland
- C, if memory serves me.) */
+ non-Windows systems without gettimeofday. */
#ifdef WINDOWS
# define TIMER_WINDOWS
#endif
#ifdef TIMER_WINDOWS
-typedef ULARGE_INTEGER wget_sys_time;
+typedef union {
+ DWORD lores; /* In case GetTickCount is used */
+ LARGE_INTEGER hires; /* In case high-resolution timer is used */
+} wget_sys_time;
#endif
struct wget_timer {
double elapsed_pre_start;
};
+#ifdef TIMER_WINDOWS
+
+/* Whether high-resolution timers are used. Set by wtimer_initialize_once
+ the first time wtimer_allocate is called. */
+static int using_hires_timers;
+
+/* Frequency of high-resolution timers -- number of updates per
+ millisecond. Calculated the first time wtimer_allocate is called
+ provided that high-resolution timers are available. */
+static double hires_millisec_freq;
+
+/* The first time a timer is created, determine whether to use
+ high-resolution timers. */
+
+static void
+wtimer_initialize_once (void)
+{
+ static int init_done;
+ if (!init_done)
+ {
+ LARGE_INTEGER freq;
+ init_done = 1;
+ freq.QuadPart = 0;
+ QueryPerformanceFrequency (&freq);
+ if (freq.QuadPart != 0)
+ {
+ using_hires_timers = 1;
+ hires_millisec_freq = (double) freq.QuadPart / 1000.0;
+ }
+ }
+}
+#endif /* TIMER_WINDOWS */
+
/* Allocate a timer. Calling wtimer_read on the timer will return
zero. It is not legal to call wtimer_update with a freshly
allocated timer -- use wtimer_reset first. */
{
struct wget_timer *wt = xnew (struct wget_timer);
xzero (*wt);
+
+#ifdef TIMER_WINDOWS
+ wtimer_initialize_once ();
+#endif
+
return wt;
}
#endif
#ifdef TIMER_WINDOWS
- /* We use GetSystemTime to get the elapsed time. MSDN warns that
- system clock adjustments can skew the output of GetSystemTime
- when used as a timer and gives preference to GetTickCount and
- high-resolution timers. But GetTickCount can overflow, and hires
- timers are typically used for profiling, not for regular time
- measurement. Since we handle clock skew anyway, we just use
- GetSystemTime. */
- FILETIME ft;
- SYSTEMTIME st;
- GetSystemTime (&st);
-
- /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
- FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
- arithmetic on that. */
- SystemTimeToFileTime (&st, &ft);
- wst->HighPart = ft.dwHighDateTime;
- wst->LowPart = ft.dwLowDateTime;
+ if (using_hires_timers)
+ {
+ QueryPerformanceCounter (&wst->hires);
+ }
+ else
+ {
+ /* Where hires counters are not available, use GetTickCount rather
+ GetSystemTime, because it is unaffected by clock skew and simpler
+ to use. Note that overflows don't affect us because we never use
+ absolute values of the ticker, only the differences. */
+ wst->lores = GetTickCount ();
+ }
#endif
}
/* Reset timer WT. This establishes the starting point from which
wtimer_elapsed() will return the number of elapsed milliseconds.
- It is allowed to reset a previously used timer.
-
- If a non-zero value is used as START, the timer's values will be
- offset by START. */
+ It is allowed to reset a previously used timer. */
void
wtimer_reset (struct wget_timer *wt)
#endif
#ifdef WINDOWS
- /* VC++ 6 doesn't support direct cast of uint64 to double. To work
- around this, we subtract, then convert to signed, then finally to
- double. */
- return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
+ if (using_hires_timers)
+ return (wst1->hires.QuadPart - wst2->hires.QuadPart) / hires_millisec_freq;
+ else
+ return wst1->lores - wst2->lores;
#endif
}
#endif
#ifdef TIMER_WINDOWS
- /* According to MSDN, GetSystemTime returns a broken-down time
- structure the smallest member of which are milliseconds. */
- return 1;
+ if (using_hires_timers)
+ return 1.0 / hires_millisec_freq;
+ else
+ return 10; /* according to MSDN */
#endif
}
\f
int rnd3 = random_number (1000);
return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
}
-
-#if 0
-/* A debugging function for checking whether an MD5 library works. */
-
-#include "gen-md5.h"
-
-char *
-debug_test_md5 (char *buf)
-{
- unsigned char raw[16];
- static char res[33];
- unsigned char *p1;
- char *p2;
- int cnt;
- ALLOCA_MD5_CONTEXT (ctx);
-
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
- gen_md5_finish (ctx, raw);
-
- p1 = raw;
- p2 = res;
- cnt = 16;
- while (cnt--)
- {
- *p2++ = XNUM_TO_digit (*p1 >> 4);
- *p2++ = XNUM_TO_digit (*p1 & 0xf);
- ++p1;
- }
- *p2 = '\0';
-
- return res;
-}
-#endif
\f
/* Implementation of run_with_timeout, a generic timeout-forcing
routine for systems with Unix-like signal handling. */
#ifdef HAVE_NANOSLEEP
/* nanosleep is the preferred interface because it offers high
accuracy and, more importantly, because it allows us to reliably
- restart after having been interrupted by a signal such as
- SIGWINCH. */
+ restart receiving a signal such as SIGWINCH. (There was an
+ actual Debian bug report about --limit-rate malfunctioning while
+ the terminal was being resized.) */
struct timespec sleep, remaining;
sleep.tv_sec = (long) seconds;
sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);