/* Various utility functions.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
#ifdef HAVE_MMAP
# include <sys/mman.h>
#endif
#ifdef HAVE_PROCESS_H
# include <process.h> /* getpid() */
#endif
-#ifdef HAVE_UTIME_H
-# include <utime.h>
-#endif
-#ifdef HAVE_SYS_UTIME_H
-# include <sys/utime.h>
-#endif
#include <errno.h>
#include <fcntl.h>
#include <assert.h>
#include <stdarg.h>
#include <locale.h>
-/* For TIOCGWINSZ and friends: */
-#ifdef HAVE_SYS_IOCTL_H
-# include <sys/ioctl.h>
+#if HAVE_UTIME
+# include <sys/types.h>
+# ifdef HAVE_UTIME_H
+# include <utime.h>
+# endif
+
+# ifdef HAVE_SYS_UTIME_H
+# include <sys/utime.h>
+# endif
#endif
+
+#include <sys/time.h>
+
+#include <sys/stat.h>
+
+/* For TIOCGWINSZ and friends: */
+#include <sys/ioctl.h>
#ifdef HAVE_TERMIOS_H
# include <termios.h>
#endif
#include <signal.h>
#include <setjmp.h>
+#include <regex.h>
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
+
#ifndef HAVE_SIGSETJMP
/* If sigsetjmp is a macro, configure won't pick it up. */
# ifdef sigsetjmp
#ifdef TESTING
#include "test.h"
-#endif
+#endif
static void
memfatal (const char *context, long attempted_size)
Vertical bar (|)
Characters escaped by "^":
- SP ! # % & ' ( ) + , . ; = @ [ ] ^ ` { } ~
+ SP ! " # % & ' ( ) + , . : ; =
+ @ [ \ ] ^ ` { | } ~
Either "^_" or "^ " is accepted as a space. Period (.) is a special
case. Note that un-escaped < and > can also confuse a directory
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* SP ! " # $ % & ' ( ) * + , - . / */
- 2, 1, 0, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
+ 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
- 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 0, 1, 1, 1, 1, 1,
+ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1,
/* @ A B C D E F G H I J K L M N O */
1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 0, 1, 1, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16,
/* ` a b c d e f g h i j k l m n o */
1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* p q r s t u v w x y z { | } ~ DEL */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 0, 1, 17, 8,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
fallback implementation of vsnprintf, this should be portable. */
/* Constant is using for limits memory allocation for text buffer.
- Applicable in situation when: vasprintf is not available in the system
+ Applicable in situation when: vasprintf is not available in the system
and vsnprintf return -1 when long line is truncated (in old versions of
glibc and in other system where C99 doesn`t support) */
else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */
{ /* maybe we have some wrong
format string? */
- logprintf (LOG_ALWAYS,
+ logprintf (LOG_ALWAYS,
_("%s: aprintf: text buffer is too big (%ld bytes), "
"aborting.\n"),
exec_name, size); /* printout a log message */
void
touch (const char *file, time_t tm)
{
-#ifdef HAVE_STRUCT_UTIMBUF
+#if HAVE_UTIME
+# ifdef HAVE_STRUCT_UTIMBUF
struct utimbuf times;
-#else
+# else
struct {
time_t actime;
time_t modtime;
} times;
-#endif
+# endif
times.modtime = tm;
times.actime = time (NULL);
if (utime (file, ×) == -1)
logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
+#else
+ struct timespec timespecs[2];
+ int fd;
+
+ fd = open (file, O_WRONLY);
+ if (fd < 0)
+ {
+ logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
+ return;
+ }
+
+ timespecs[0].tv_sec = time (NULL);
+ timespecs[0].tv_nsec = 0L;
+ timespecs[1].tv_sec = tm;
+ timespecs[1].tv_nsec = 0L;
+
+ if (futimens (fd, timespecs) == -1)
+ logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
+
+ close (fd);
+#endif
}
/* Checks if FILE is a symbolic link, and removes it if it is. Does
If opening the file fails for any reason, including the file having
previously existed, this function returns NULL and sets errno
appropriately. */
-
+
FILE *
-fopen_excl (const char *fname, bool binary)
+fopen_excl (const char *fname, int binary)
{
int fd;
#ifdef O_EXCL
open_id = 13;
fd = open( fname, /* File name. */
flags, /* Flags. */
- 0777, /* Mode for default protection.
-*/
+ 0777, /* Mode for default protection. */
"rfm=stmlf", /* Stream_LF. */
OPEN_OPT_ARGS); /* Access callback. */
}
{
int l = strlen (s);
+ if (opt.output_document && strcmp (s, opt.output_document) == 0)
+ return true;
+
while (l && s[l] != '/')
--l;
if (s[l] == '/')
return true;
}
+/* Determine whether an URL is acceptable to be followed, according to
+ regex patterns to accept/reject. */
+bool
+accept_url (const char *s)
+{
+ if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
+ return false;
+ if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
+ return false;
+
+ return true;
+}
+
/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
- will return true if and only if D2 begins with `/something/' or is exactly
+ will return true if and only if D2 begins with `/something/' or is exactly
'/something'. */
bool
subdir_p (const char *d1, const char *d2)
else
for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
;
-
+
return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
}
break;
}
}
-
+
return *x ? true : false;
}
/* Return true if FNAME ends with a typical HTML suffix. The
following (case-insensitive) suffixes are presumed to be HTML
files:
-
+
html
htm
?html (`?' matches one character)
zero-terminated, and you should *not* read or write beyond the [0,
length) range of characters.
- After you are done with the file contents, call read_file_free to
+ After you are done with the file contents, call wget_read_file_free to
release the memory.
Depending on the operating system and the type of file that is
- being read, read_file() either mmap's the file into memory, or
+ being read, wget_read_file() either mmap's the file into memory, or
reads the file into the core using read().
If file is named "-", fileno(stdin) is used for reading instead.
If you want to read from a real file named "-", use "./-" instead. */
struct file_memory *
-read_file (const char *file)
+wget_read_file (const char *file)
{
int fd;
struct file_memory *fm;
memory needed to hold the FM structure itself. */
void
-read_file_free (struct file_memory *fm)
+wget_read_file_free (struct file_memory *fm)
{
#ifdef HAVE_MMAP
if (fm->mmap_p)
ringpos = (ringpos + 1) % RING_SIZE;
return buf;
}
+
+/* Converts the byte to bits format if --bits option is enabled
+ */
+wgint
+convert_to_bits (wgint num)
+{
+ if (opt.bits_fmt)
+ return num * 8;
+ return num;
+}
+
\f
/* Determine the width of the terminal we're running on. If that's
not possible, return 0. */
/* We don't have siglongjmp to preserve the set of blocked signals;
if we longjumped out of the handler at this point, SIGALRM would
remain blocked. We must unblock it manually. */
- int mask = siggetmask ();
- mask &= ~sigmask (SIGALRM);
- sigsetmask (mask);
+ sigset_t set;
+ sigemptyset (&set);
+ sigaddset (&set, SIGALRM);
+ sigprocmask (SIG_BLOCK, &set, NULL);
/* Now it's safe to longjump. */
longjmp (run_with_timeout_env, -1);
return q - (char *) dest;
}
+#ifdef HAVE_LIBPCRE
+/* Compiles the PCRE regex. */
+void *
+compile_pcre_regex (const char *str)
+{
+ const char *errbuf;
+ int erroffset;
+ pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
+ if (! regex)
+ {
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ return false;
+ }
+ return regex;
+}
+#endif
+
+/* Compiles the POSIX regex. */
+void *
+compile_posix_regex (const char *str)
+{
+ regex_t *regex = xmalloc (sizeof (regex_t));
+ int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
+ if (errcode != 0)
+ {
+ int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ errbuf_size = regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ xfree (errbuf);
+ return NULL;
+ }
+
+ return regex;
+}
+
+#ifdef HAVE_LIBPCRE
+#define OVECCOUNT 30
+/* Matches a PCRE regex. */
+bool
+match_pcre_regex (const void *regex, const char *str)
+{
+ int l = strlen (str);
+ int ovector[OVECCOUNT];
+
+ int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
+ if (rc == PCRE_ERROR_NOMATCH)
+ return false;
+ else if (rc < 0)
+ {
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ return false;
+ }
+ else
+ return true;
+}
+#undef OVECCOUNT
+#endif
+
+/* Matches a POSIX regex. */
+bool
+match_posix_regex (const void *regex, const char *str)
+{
+ int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
+ if (rc == REG_NOMATCH)
+ return false;
+ else if (rc == 0)
+ return true;
+ else
+ {
+ int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ errbuf_size = regerror (rc, opt.acceptregex, errbuf, errbuf_size);
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ xfree (errbuf);
+ return false;
+ }
+}
+
#undef IS_ASCII
#undef NEXT_CHAR
\f
{ "/somedir", "/somedir/d2", true },
{ "/somedir/d1", "/somedir", false },
};
-
- for (i = 0; i < countof(test_array); ++i)
+
+ for (i = 0; i < countof(test_array); ++i)
{
bool res = subdir_p (test_array[i].d1, test_array[i].d2);
- mu_assert ("test_subdir_p: wrong result",
+ mu_assert ("test_subdir_p: wrong result",
res == test_array[i].result);
}
{ { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
{ { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
};
-
- for (i = 0; i < countof(test_array); ++i)
+
+ for (i = 0; i < countof(test_array); ++i)
{
bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
-
- mu_assert ("test_dir_matches_p: wrong result",
+
+ mu_assert ("test_dir_matches_p: wrong result",
res == test_array[i].result);
}