/* Various utility functions.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
#ifdef HAVE_MMAP
# include <sys/mman.h>
#endif
#ifdef HAVE_PROCESS_H
# include <process.h> /* getpid() */
#endif
-#ifdef HAVE_UTIME_H
-# include <utime.h>
-#endif
-#ifdef HAVE_SYS_UTIME_H
-# include <sys/utime.h>
-#endif
#include <errno.h>
#include <fcntl.h>
#include <assert.h>
#include <stdarg.h>
#include <locale.h>
-/* For TIOCGWINSZ and friends: */
-#ifdef HAVE_SYS_IOCTL_H
-# include <sys/ioctl.h>
+#if HAVE_UTIME
+# include <sys/types.h>
+# ifdef HAVE_UTIME_H
+# include <utime.h>
+# endif
+
+# ifdef HAVE_SYS_UTIME_H
+# include <sys/utime.h>
+# endif
#endif
+
+#include <sys/time.h>
+
+#include <sys/stat.h>
+
+/* For TIOCGWINSZ and friends: */
+#include <sys/ioctl.h>
#ifdef HAVE_TERMIOS_H
# include <termios.h>
#endif
#include <signal.h>
#include <setjmp.h>
+#include <regex.h>
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
+
#ifndef HAVE_SIGSETJMP
/* If sigsetjmp is a macro, configure won't pick it up. */
# ifdef sigsetjmp
Vertical bar (|)
Characters escaped by "^":
- SP ! # % & ' ( ) + , . ; = @ [ ] ^ ` { } ~
+ SP ! " # % & ' ( ) + , . : ; =
+ @ [ \ ] ^ ` { | } ~
Either "^_" or "^ " is accepted as a space. Period (.) is a special
case. Note that un-escaped < and > can also confuse a directory
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* SP ! " # $ % & ' ( ) * + , - . / */
- 2, 1, 0, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
+ 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
- 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 0, 1, 1, 1, 1, 1,
+ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1,
/* @ A B C D E F G H I J K L M N O */
1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 0, 1, 1, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16,
/* ` a b c d e f g h i j k l m n o */
1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* p q r s t u v w x y z { | } ~ DEL */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 0, 1, 17, 8,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
void
touch (const char *file, time_t tm)
{
-#ifdef HAVE_STRUCT_UTIMBUF
+#if HAVE_UTIME
+# ifdef HAVE_STRUCT_UTIMBUF
struct utimbuf times;
-#else
+# else
struct {
time_t actime;
time_t modtime;
} times;
-#endif
+# endif
times.modtime = tm;
times.actime = time (NULL);
if (utime (file, ×) == -1)
logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
+#else
+ struct timespec timespecs[2];
+ int fd;
+
+ fd = open (file, O_WRONLY);
+ if (fd < 0)
+ {
+ logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
+ return;
+ }
+
+ timespecs[0].tv_sec = time (NULL);
+ timespecs[0].tv_nsec = 0L;
+ timespecs[1].tv_sec = tm;
+ timespecs[1].tv_nsec = 0L;
+
+ if (futimens (fd, timespecs) == -1)
+ logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
+
+ close (fd);
+#endif
}
/* Checks if FILE is a symbolic link, and removes it if it is. Does
xfree (uname);
uname = unique_name (name, false);
}
- if (opened_name && fp != NULL)
+ if (opened_name)
{
if (fp)
*opened_name = uname;
open_id = 13;
fd = open( fname, /* File name. */
flags, /* Flags. */
- 0777, /* Mode for default protection.
-*/
+ 0777, /* Mode for default protection. */
"rfm=stmlf", /* Stream_LF. */
OPEN_OPT_ARGS); /* Access callback. */
}
bool
acceptable (const char *s)
{
- int l = strlen (s);
+ const char *p;
+
+ if (opt.output_document && strcmp (s, opt.output_document) == 0)
+ return true;
+
+ if ((p = strrchr (s, '/')))
+ s = p + 1;
- while (l && s[l] != '/')
- --l;
- if (s[l] == '/')
- s += (l + 1);
if (opt.accepts)
{
if (opt.rejects)
}
else if (opt.rejects)
return !in_acclist ((const char *const *)opt.rejects, s, true);
+
+ return true;
+}
+
+/* Determine whether an URL is acceptable to be followed, according to
+ regex patterns to accept/reject. */
+bool
+accept_url (const char *s)
+{
+ if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
+ return false;
+ if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
+ return false;
+
return true;
}
bool
match_tail (const char *string, const char *tail, bool fold_case)
{
- int i, j;
+ int pos = strlen (string) - strlen (tail);
- /* We want this to be fast, so we code two loops, one with
- case-folding, one without. */
+ if (pos < 0)
+ return false; /* tail is longer than string. */
if (!fold_case)
- {
- for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
- if (string[i] != tail[j])
- break;
- }
+ return strcmp (string + pos, tail);
else
- {
- for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
- if (c_tolower (string[i]) != c_tolower (tail[j]))
- break;
- }
-
- /* If the tail was exhausted, the match was succesful. */
- if (j == -1)
- return true;
- else
- return false;
+ return strcasecmp (string + pos, tail);
}
/* Checks whether string S matches each element of ACCEPTS. A list
char *
suffix (const char *str)
{
- int i;
+ char *p;
- for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
- ;
+ if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
+ return p + 1;
- if (str[i++] == '.')
- return (char *)str + i;
- else
- return NULL;
+ return NULL;
}
/* Return true if S contains globbing wildcards (`*', `?', `[' or
bool
has_wildcards_p (const char *s)
{
- for (; *s; s++)
- if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
- return true;
- return false;
+ return !!strpbrk (s, "*?[]");
}
/* Return true if FNAME ends with a typical HTML suffix. The
return false;
}
-/* Read a line from FP and return the pointer to freshly allocated
- storage. The storage space is obtained through malloc() and should
- be freed with free() when it is no longer needed.
-
- The length of the line is not limited, except by available memory.
- The newline character at the end of line is retained. The line is
- terminated with a zero character.
-
- After end-of-file is encountered without anything being read, NULL
- is returned. NULL is also returned on error. To distinguish
- between these two cases, use the stdio function ferror(). */
-
-char *
-read_whole_line (FILE *fp)
-{
- int length = 0;
- int bufsize = 82;
- char *line = xmalloc (bufsize);
-
- while (fgets (line + length, bufsize - length, fp))
- {
- length += strlen (line + length);
- if (length == 0)
- /* Possible for example when reading from a binary file where
- a line begins with \0. */
- continue;
-
- if (line[length - 1] == '\n')
- break;
-
- /* fgets() guarantees to read the whole line, or to use up the
- space we've given it. We can double the buffer
- unconditionally. */
- bufsize <<= 1;
- line = xrealloc (line, bufsize);
- }
- if (length == 0 || ferror (fp))
- {
- xfree (line);
- return NULL;
- }
- if (length + 1 < bufsize)
- /* Relieve the memory from our exponential greediness. We say
- `length + 1' because the terminating \0 is not included in
- LENGTH. We don't need to zero-terminate the string ourselves,
- though, because fgets() does that. */
- line = xrealloc (line, length + 1);
- return line;
-}
-\f
/* Read FILE into memory. A pointer to `struct file_memory' are
returned; use struct element `content' to access file contents, and
the element `length' to know the file length. `content' is *not*
zero-terminated, and you should *not* read or write beyond the [0,
length) range of characters.
- After you are done with the file contents, call read_file_free to
+ After you are done with the file contents, call wget_read_file_free to
release the memory.
Depending on the operating system and the type of file that is
- being read, read_file() either mmap's the file into memory, or
+ being read, wget_read_file() either mmap's the file into memory, or
reads the file into the core using read().
If file is named "-", fileno(stdin) is used for reading instead.
If you want to read from a real file named "-", use "./-" instead. */
struct file_memory *
-read_file (const char *file)
+wget_read_file (const char *file)
{
int fd;
struct file_memory *fm;
memory needed to hold the FM structure itself. */
void
-read_file_free (struct file_memory *fm)
+wget_read_file_free (struct file_memory *fm)
{
#ifdef HAVE_MMAP
if (fm->mmap_p)
ringpos = (ringpos + 1) % RING_SIZE;
return buf;
}
+
+/* Converts the byte to bits format if --report-bps option is enabled
+ */
+wgint
+convert_to_bits (wgint num)
+{
+ if (opt.report_bps)
+ return num * 8;
+ return num;
+}
+
\f
/* Determine the width of the terminal we're running on. If that's
not possible, return 0. */
/* We don't have siglongjmp to preserve the set of blocked signals;
if we longjumped out of the handler at this point, SIGALRM would
remain blocked. We must unblock it manually. */
- int mask = siggetmask ();
- mask &= ~sigmask (SIGALRM);
- sigsetmask (mask);
+ sigset_t set;
+ sigemptyset (&set);
+ sigaddset (&set, SIGALRM);
+ sigprocmask (SIG_BLOCK, &set, NULL);
/* Now it's safe to longjump. */
longjmp (run_with_timeout_env, -1);
return q - (char *) dest;
}
+#ifdef HAVE_LIBPCRE
+/* Compiles the PCRE regex. */
+void *
+compile_pcre_regex (const char *str)
+{
+ const char *errbuf;
+ int erroffset;
+ pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
+ if (! regex)
+ {
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ return false;
+ }
+ return regex;
+}
+#endif
+
+/* Compiles the POSIX regex. */
+void *
+compile_posix_regex (const char *str)
+{
+ regex_t *regex = xmalloc (sizeof (regex_t));
+ int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
+ if (errcode != 0)
+ {
+ int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ xfree (errbuf);
+ return NULL;
+ }
+
+ return regex;
+}
+
+#ifdef HAVE_LIBPCRE
+#define OVECCOUNT 30
+/* Matches a PCRE regex. */
+bool
+match_pcre_regex (const void *regex, const char *str)
+{
+ int l = strlen (str);
+ int ovector[OVECCOUNT];
+
+ int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
+ if (rc == PCRE_ERROR_NOMATCH)
+ return false;
+ else if (rc < 0)
+ {
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ return false;
+ }
+ else
+ return true;
+}
+#undef OVECCOUNT
+#endif
+
+/* Matches a POSIX regex. */
+bool
+match_posix_regex (const void *regex, const char *str)
+{
+ int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
+ if (rc == REG_NOMATCH)
+ return false;
+ else if (rc == 0)
+ return true;
+ else
+ {
+ int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ regerror (rc, opt.acceptregex, errbuf, errbuf_size);
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ xfree (errbuf);
+ return false;
+ }
+}
+
#undef IS_ASCII
#undef NEXT_CHAR
\f
return buf;
}
+/* Get the maximum name length for the given path. */
+/* Return 0 if length is unknown. */
+size_t
+get_max_length (const char *path, int length, int name)
+{
+ long ret;
+ char *p, *d;
+
+ /* Make a copy of the path that we can modify. */
+ p = path ? strdupdelim (path, path + length) : strdup ("");
+
+ for (;;)
+ {
+ errno = 0;
+ /* For an empty path query the current directory. */
+#if HAVE_PATHCONF
+ ret = pathconf (*p ? p : ".", name);
+ if (!(ret < 0 && errno == ENOENT))
+ break;
+#else
+ ret = PATH_MAX;
+#endif
+
+ /* The path does not exist yet, but may be created. */
+ /* Already at current or root directory, give up. */
+ if (!*p || strcmp (p, "/") == 0)
+ break;
+
+ /* Remove one directory level and try again. */
+ d = strrchr (p, '/');
+ if (d == p)
+ p[1] = '\0'; /* check root directory */
+ else if (d)
+ *d = '\0'; /* remove last directory part */
+ else
+ *p = '\0'; /* check current directory */
+ }
+
+ xfree (p);
+
+ if (ret < 0)
+ {
+ /* pathconf() has a message for us. */
+ if (errno != 0)
+ perror ("pathconf");
+
+ /* If (errno == 0) then there is no max length.
+ Even on error return 0 so the caller can continue. */
+ return 0;
+ }
+
+ return ret;
+}
+
#ifdef TESTING
const char *
test_subdir_p()
{
- int i;
- struct {
- char *d1;
- char *d2;
+ static struct {
+ const char *d1;
+ const char *d2;
bool result;
} test_array[] = {
{ "/somedir", "/somedir", true },
{ "/somedir", "/somedir/d2", true },
{ "/somedir/d1", "/somedir", false },
};
+ unsigned i;
for (i = 0; i < countof(test_array); ++i)
{
const char *
test_dir_matches_p()
{
- int i;
- struct {
- char *dirlist[3];
- char *dir;
+ static struct {
+ const char *dirlist[3];
+ const char *dir;
bool result;
} test_array[] = {
{ { "/somedir", "/someotherdir", NULL }, "somedir", true },
{ { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
{ { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
};
+ unsigned i;
for (i = 0; i < countof(test_array); ++i)
{