/* Various utility functions.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
#ifdef HAVE_MMAP
# include <sys/mman.h>
#endif
#ifdef HAVE_PROCESS_H
# include <process.h> /* getpid() */
#endif
-#ifdef HAVE_UTIME_H
-# include <utime.h>
-#endif
-#ifdef HAVE_SYS_UTIME_H
-# include <sys/utime.h>
-#endif
#include <errno.h>
#include <fcntl.h>
#include <assert.h>
#include <stdarg.h>
#include <locale.h>
-/* For TIOCGWINSZ and friends: */
-#ifdef HAVE_SYS_IOCTL_H
-# include <sys/ioctl.h>
+#if HAVE_UTIME
+# include <sys/types.h>
+# ifdef HAVE_UTIME_H
+# include <utime.h>
+# endif
+
+# ifdef HAVE_SYS_UTIME_H
+# include <sys/utime.h>
+# endif
#endif
+
+#include <sys/time.h>
+
+#include <sys/stat.h>
+
+/* For TIOCGWINSZ and friends: */
+#include <sys/ioctl.h>
#ifdef HAVE_TERMIOS_H
# include <termios.h>
#endif
#include <signal.h>
#include <setjmp.h>
+#include <regex.h>
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
+
#ifndef HAVE_SIGSETJMP
/* If sigsetjmp is a macro, configure won't pick it up. */
# ifdef sigsetjmp
#ifdef TESTING
#include "test.h"
-#endif
+#endif
+
+static void
+memfatal (const char *context, long attempted_size)
+{
+ /* Make sure we don't try to store part of the log line, and thus
+ call malloc. */
+ log_set_save_context (false);
+
+ /* We have different log outputs in different situations:
+ 1) output without bytes information
+ 2) output with bytes information */
+ if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
+ {
+ logprintf (LOG_ALWAYS,
+ _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
+ exec_name, context);
+ }
+ else
+ {
+ logprintf (LOG_ALWAYS,
+ _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
+ exec_name, context, attempted_size);
+ }
+
+ exit (1);
+}
/* Character property table for (re-)escaping VMS ODS5 extended file
names. Note that this table ignores Unicode.
Vertical bar (|)
Characters escaped by "^":
- SP ! # % & ' ( ) + , . ; = @ [ ] ^ ` { } ~
+ SP ! " # % & ' ( ) + , . : ; =
+ @ [ \ ] ^ ` { | } ~
Either "^_" or "^ " is accepted as a space. Period (.) is a special
case. Note that un-escaped < and > can also confuse a directory
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* SP ! " # $ % & ' ( ) * + , - . / */
- 2, 1, 0, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
+ 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
- 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 0, 1, 1, 1, 1, 1,
+ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1,
/* @ A B C D E F G H I J K L M N O */
1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 0, 1, 1, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16,
/* ` a b c d e f g h i j k l m n o */
1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* p q r s t u v w x y z { | } ~ DEL */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 0, 1, 17, 8,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
fallback implementation of vsnprintf, this should be portable. */
/* Constant is using for limits memory allocation for text buffer.
- Applicable in situation when: vasprintf is not available in the system
+ Applicable in situation when: vasprintf is not available in the system
and vsnprintf return -1 when long line is truncated (in old versions of
glibc and in other system where C99 doesn`t support) */
else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */
{ /* maybe we have some wrong
format string? */
- logprintf (LOG_ALWAYS,
+ logprintf (LOG_ALWAYS,
_("%s: aprintf: text buffer is too big (%ld bytes), "
"aborting.\n"),
exec_name, size); /* printout a log message */
const char *next_str;
int total_length = 0;
- int argcount;
+ size_t argcount;
/* Calculate the length of and allocate the resulting string. */
/* parent, no error */
printf (_("Continuing in background, pid %d.\n"), (int) pid);
if (logfile_changed)
- printf (_("Output will be written to `%s'.\n"), opt.lfilename);
+ printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
exit (0); /* #### should we use _exit()? */
}
/* child: give up the privileges and keep running. */
setsid ();
- freopen ("/dev/null", "r", stdin);
- freopen ("/dev/null", "w", stdout);
- freopen ("/dev/null", "w", stderr);
+ if (freopen ("/dev/null", "r", stdin) == NULL)
+ DEBUGP (("Failed to redirect stdin to /dev/null.\n"));
+ if (freopen ("/dev/null", "w", stdout) == NULL)
+ DEBUGP (("Failed to redirect stdout to /dev/null.\n"));
+ if (freopen ("/dev/null", "w", stderr) == NULL)
+ DEBUGP (("Failed to redirect stderr to /dev/null.\n"));
}
#endif /* !WINDOWS && !MSDOS */
void
touch (const char *file, time_t tm)
{
-#ifdef HAVE_STRUCT_UTIMBUF
+#if HAVE_UTIME
+# ifdef HAVE_STRUCT_UTIMBUF
struct utimbuf times;
-#else
+# else
struct {
time_t actime;
time_t modtime;
} times;
-#endif
+# endif
times.modtime = tm;
times.actime = time (NULL);
if (utime (file, ×) == -1)
logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
+#else
+ struct timespec timespecs[2];
+ int fd;
+
+ fd = open (file, O_WRONLY);
+ if (fd < 0)
+ {
+ logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
+ return;
+ }
+
+ timespecs[0].tv_sec = time (NULL);
+ timespecs[0].tv_nsec = 0L;
+ timespecs[1].tv_sec = tm;
+ timespecs[1].tv_nsec = 0L;
+
+ if (futimens (fd, timespecs) == -1)
+ logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
+
+ close (fd);
+#endif
}
/* Checks if FILE is a symbolic link, and removes it if it is. Does
DEBUGP (("Unlinking %s (symlink).\n", file));
err = unlink (file);
if (err != 0)
- logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
- file, strerror (errno));
+ logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
+ quote (file), strerror (errno));
}
return err;
}
xfree (uname);
uname = unique_name (name, false);
}
- if (opened_name && fp != NULL)
+ if (opened_name)
{
if (fp)
*opened_name = uname;
If opening the file fails for any reason, including the file having
previously existed, this function returns NULL and sets errno
appropriately. */
-
+
FILE *
-fopen_excl (const char *fname, bool binary)
+fopen_excl (const char *fname, int binary)
{
int fd;
#ifdef O_EXCL
open_id = 13;
fd = open( fname, /* File name. */
flags, /* Flags. */
- 0777, /* Mode for default protection.
-*/
+ 0777, /* Mode for default protection. */
"rfm=stmlf", /* Stream_LF. */
OPEN_OPT_ARGS); /* Access callback. */
}
bool
acceptable (const char *s)
{
- int l = strlen (s);
+ const char *p;
+
+ if (opt.output_document && strcmp (s, opt.output_document) == 0)
+ return true;
+
+ if ((p = strrchr (s, '/')))
+ s = p + 1;
- while (l && s[l] != '/')
- --l;
- if (s[l] == '/')
- s += (l + 1);
if (opt.accepts)
{
if (opt.rejects)
}
else if (opt.rejects)
return !in_acclist ((const char *const *)opt.rejects, s, true);
+
+ return true;
+}
+
+/* Determine whether an URL is acceptable to be followed, according to
+ regex patterns to accept/reject. */
+bool
+accept_url (const char *s)
+{
+ if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
+ return false;
+ if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
+ return false;
+
return true;
}
/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
- will return true if and only if D2 begins with `/something/' or is exactly
+ will return true if and only if D2 begins with `/something/' or is exactly
'/something'. */
bool
subdir_p (const char *d1, const char *d2)
else
for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
;
-
+
return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
}
first element that matches DIR, through wildcards or front comparison (as
appropriate). */
static bool
-dir_matches_p (char **dirlist, const char *dir)
+dir_matches_p (const char **dirlist, const char *dir)
{
- char **x;
+ const char **x;
int (*matcher) (const char *, const char *, int)
= opt.ignore_case ? fnmatch_nocase : fnmatch;
for (x = dirlist; *x; x++)
{
/* Remove leading '/' */
- char *p = *x + (**x == '/');
+ const char *p = *x + (**x == '/');
if (has_wildcards_p (p))
{
if (matcher (p, dir, FNM_PATHNAME) == 0)
break;
}
}
-
+
return *x ? true : false;
}
bool
match_tail (const char *string, const char *tail, bool fold_case)
{
- int i, j;
+ int pos = strlen (string) - strlen (tail);
- /* We want this to be fast, so we code two loops, one with
- case-folding, one without. */
+ if (pos < 0)
+ return false; /* tail is longer than string. */
if (!fold_case)
- {
- for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
- if (string[i] != tail[j])
- break;
- }
+ return !strcmp (string + pos, tail);
else
- {
- for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
- if (c_tolower (string[i]) != c_tolower (tail[j]))
- break;
- }
-
- /* If the tail was exhausted, the match was succesful. */
- if (j == -1)
- return true;
- else
- return false;
+ return !strcasecmp (string + pos, tail);
}
/* Checks whether string S matches each element of ACCEPTS. A list
char *
suffix (const char *str)
{
- int i;
+ char *p;
- for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
- ;
+ if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
+ return p + 1;
- if (str[i++] == '.')
- return (char *)str + i;
- else
- return NULL;
+ return NULL;
}
/* Return true if S contains globbing wildcards (`*', `?', `[' or
bool
has_wildcards_p (const char *s)
{
- for (; *s; s++)
- if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
- return true;
- return false;
+ return !!strpbrk (s, "*?[]");
}
/* Return true if FNAME ends with a typical HTML suffix. The
following (case-insensitive) suffixes are presumed to be HTML
files:
-
+
html
htm
?html (`?' matches one character)
return false;
}
-/* Read a line from FP and return the pointer to freshly allocated
- storage. The storage space is obtained through malloc() and should
- be freed with free() when it is no longer needed.
-
- The length of the line is not limited, except by available memory.
- The newline character at the end of line is retained. The line is
- terminated with a zero character.
-
- After end-of-file is encountered without anything being read, NULL
- is returned. NULL is also returned on error. To distinguish
- between these two cases, use the stdio function ferror(). */
-
-char *
-read_whole_line (FILE *fp)
-{
- int length = 0;
- int bufsize = 82;
- char *line = xmalloc (bufsize);
-
- while (fgets (line + length, bufsize - length, fp))
- {
- length += strlen (line + length);
- if (length == 0)
- /* Possible for example when reading from a binary file where
- a line begins with \0. */
- continue;
-
- if (line[length - 1] == '\n')
- break;
-
- /* fgets() guarantees to read the whole line, or to use up the
- space we've given it. We can double the buffer
- unconditionally. */
- bufsize <<= 1;
- line = xrealloc (line, bufsize);
- }
- if (length == 0 || ferror (fp))
- {
- xfree (line);
- return NULL;
- }
- if (length + 1 < bufsize)
- /* Relieve the memory from our exponential greediness. We say
- `length + 1' because the terminating \0 is not included in
- LENGTH. We don't need to zero-terminate the string ourselves,
- though, because fgets() does that. */
- line = xrealloc (line, length + 1);
- return line;
-}
-\f
/* Read FILE into memory. A pointer to `struct file_memory' are
returned; use struct element `content' to access file contents, and
the element `length' to know the file length. `content' is *not*
zero-terminated, and you should *not* read or write beyond the [0,
length) range of characters.
- After you are done with the file contents, call read_file_free to
+ After you are done with the file contents, call wget_read_file_free to
release the memory.
Depending on the operating system and the type of file that is
- being read, read_file() either mmap's the file into memory, or
+ being read, wget_read_file() either mmap's the file into memory, or
reads the file into the core using read().
If file is named "-", fileno(stdin) is used for reading instead.
If you want to read from a real file named "-", use "./-" instead. */
struct file_memory *
-read_file (const char *file)
+wget_read_file (const char *file)
{
int fd;
struct file_memory *fm;
memory needed to hold the FM structure itself. */
void
-read_file_free (struct file_memory *fm)
+wget_read_file_free (struct file_memory *fm)
{
#ifdef HAVE_MMAP
if (fm->mmap_p)
some detail. */
char *
-human_readable (HR_NUMTYPE n)
+human_readable (HR_NUMTYPE n, const int acc, const int decimals)
{
/* These suffixes are compatible with those of GNU `ls -lh'. */
static char powers[] =
'E', /* exabyte, 2^60 bytes */
};
static char buf[8];
- int i;
+ size_t i;
/* If the quantity is smaller than 1K, just print it. */
if (n < 1024)
if ((n / 1024) < 1024 || i == countof (powers) - 1)
{
double val = n / 1024.0;
- /* Print values smaller than 10 with one decimal digits, and
- others without any decimals. */
+ /* Print values smaller than the accuracy level (acc) with (decimal)
+ * decimal digits, and others without any decimals. */
snprintf (buf, sizeof (buf), "%.*f%c",
- val < 10 ? 1 : 0, val, powers[i]);
+ val < acc ? decimals : 0, val, powers[i]);
return buf;
}
n /= 1024;
ringpos = (ringpos + 1) % RING_SIZE;
return buf;
}
+
+/* Converts the byte to bits format if --report-bps option is enabled
+ */
+wgint
+convert_to_bits (wgint num)
+{
+ if (opt.report_bps)
+ return num * 8;
+ return num;
+}
+
\f
/* Determine the width of the terminal we're running on. If that's
not possible, return 0. */
/* We don't have siglongjmp to preserve the set of blocked signals;
if we longjumped out of the handler at this point, SIGALRM would
remain blocked. We must unblock it manually. */
- int mask = siggetmask ();
- mask &= ~sigmask (SIGALRM);
- sigsetmask (mask);
+ sigset_t set;
+ sigemptyset (&set);
+ sigaddset (&set, SIGALRM);
+ sigprocmask (SIG_BLOCK, &set, NULL);
/* Now it's safe to longjump. */
longjmp (run_with_timeout_env, -1);
This implementation does not emit newlines after 76 characters of
base64 data. */
-int
-base64_encode (const void *data, int length, char *dest)
+size_t
+base64_encode (const void *data, size_t length, char *dest)
{
/* Conversion table. */
static const char tbl[64] = {
This function originates from Free Recode. */
-int
+ssize_t
base64_decode (const char *base64, void *dest)
{
/* Table of base64 values for first 128 characters. Note that this
return q - (char *) dest;
}
+#ifdef HAVE_LIBPCRE
+/* Compiles the PCRE regex. */
+void *
+compile_pcre_regex (const char *str)
+{
+ const char *errbuf;
+ int erroffset;
+ pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
+ if (! regex)
+ {
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ return false;
+ }
+ return regex;
+}
+#endif
+
+/* Compiles the POSIX regex. */
+void *
+compile_posix_regex (const char *str)
+{
+ regex_t *regex = xmalloc (sizeof (regex_t));
+ int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
+ if (errcode != 0)
+ {
+ size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ xfree (errbuf);
+ return NULL;
+ }
+
+ return regex;
+}
+
+#ifdef HAVE_LIBPCRE
+#define OVECCOUNT 30
+/* Matches a PCRE regex. */
+bool
+match_pcre_regex (const void *regex, const char *str)
+{
+ size_t l = strlen (str);
+ int ovector[OVECCOUNT];
+
+ int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT);
+ if (rc == PCRE_ERROR_NOMATCH)
+ return false;
+ else if (rc < 0)
+ {
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ return false;
+ }
+ else
+ return true;
+}
+#undef OVECCOUNT
+#endif
+
+/* Matches a POSIX regex. */
+bool
+match_posix_regex (const void *regex, const char *str)
+{
+ int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
+ if (rc == REG_NOMATCH)
+ return false;
+ else if (rc == 0)
+ return true;
+ else
+ {
+ size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ regerror (rc, opt.acceptregex, errbuf, errbuf_size);
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ xfree (errbuf);
+ return false;
+ }
+}
+
#undef IS_ASCII
#undef NEXT_CHAR
\f
return buf;
}
+/* Get the maximum name length for the given path. */
+/* Return 0 if length is unknown. */
+long
+get_max_length (const char *path, int length, int name)
+{
+ long ret;
+ char *p, *d;
+
+ /* Make a copy of the path that we can modify. */
+ p = path ? strdupdelim (path, path + length) : strdup ("");
+
+ for (;;)
+ {
+ errno = 0;
+ /* For an empty path query the current directory. */
+#if HAVE_PATHCONF
+ ret = pathconf (*p ? p : ".", name);
+ if (!(ret < 0 && errno == ENOENT))
+ break;
+#else
+ ret = PATH_MAX;
+#endif
+
+ /* The path does not exist yet, but may be created. */
+ /* Already at current or root directory, give up. */
+ if (!*p || strcmp (p, "/") == 0)
+ break;
+
+ /* Remove one directory level and try again. */
+ d = strrchr (p, '/');
+ if (d == p)
+ p[1] = '\0'; /* check root directory */
+ else if (d)
+ *d = '\0'; /* remove last directory part */
+ else
+ *p = '\0'; /* check current directory */
+ }
+
+ xfree (p);
+
+ if (ret < 0)
+ {
+ /* pathconf() has a message for us. */
+ if (errno != 0)
+ perror ("pathconf");
+
+ /* If (errno == 0) then there is no max length.
+ Even on error return 0 so the caller can continue. */
+ return 0;
+ }
+
+ return ret;
+}
+
#ifdef TESTING
const char *
-test_subdir_p()
+test_subdir_p(void)
{
- int i;
- struct {
- char *d1;
- char *d2;
+ static const struct {
+ const char *d1;
+ const char *d2;
bool result;
} test_array[] = {
{ "/somedir", "/somedir", true },
{ "/somedir", "/somedir/d2", true },
{ "/somedir/d1", "/somedir", false },
};
-
- for (i = 0; i < countof(test_array); ++i)
+ unsigned i;
+
+ for (i = 0; i < countof(test_array); ++i)
{
bool res = subdir_p (test_array[i].d1, test_array[i].d2);
- mu_assert ("test_subdir_p: wrong result",
+ mu_assert ("test_subdir_p: wrong result",
res == test_array[i].result);
}
}
const char *
-test_dir_matches_p()
+test_dir_matches_p(void)
{
- int i;
- struct {
- char *dirlist[3];
- char *dir;
+ static struct {
+ const char *dirlist[3];
+ const char *dir;
bool result;
} test_array[] = {
{ { "/somedir", "/someotherdir", NULL }, "somedir", true },
{ { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
{ { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
{ { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
+ { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
+ { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
};
-
- for (i = 0; i < countof(test_array); ++i)
+ unsigned i;
+
+ for (i = 0; i < countof(test_array); ++i)
{
bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
-
- mu_assert ("test_dir_matches_p: wrong result",
+
+ mu_assert ("test_dir_matches_p: wrong result",
res == test_array[i].result);
}