X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Futils.c;h=567dc359eeaa94e2a674ae92e70ea3d80bfa4c15;hp=4060aa5079cfaf570add0ec71d58d5b27d75f2e0;hb=b718128b4f3eb8473fb3b31c8397b49854e74ab7;hpb=b9e9ad65ccf6c1fbf989852c5adbb0f840f9c58b diff --git a/src/utils.c b/src/utils.c index 4060aa50..567dc359 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,6 +1,7 @@ /* Various utility functions. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -34,34 +35,36 @@ as that of the covered work. */ #include #include #include -#ifdef HAVE_SYS_TIME_H -# include -#endif -#ifdef HAVE_UNISTD_H -# include -#endif +#include #ifdef HAVE_MMAP # include #endif #ifdef HAVE_PROCESS_H # include /* getpid() */ #endif -#ifdef HAVE_UTIME_H -# include -#endif -#ifdef HAVE_SYS_UTIME_H -# include -#endif #include #include #include #include #include -/* For TIOCGWINSZ and friends: */ -#ifdef HAVE_SYS_IOCTL_H -# include +#if HAVE_UTIME +# include +# ifdef HAVE_UTIME_H +# include +# endif + +# ifdef HAVE_SYS_UTIME_H +# include +# endif #endif + +#include + +#include + +/* For TIOCGWINSZ and friends: */ +#include #ifdef HAVE_TERMIOS_H # include #endif @@ -70,6 +73,11 @@ as that of the covered work. */ #include #include +#include +#ifdef HAVE_LIBPCRE +# include +#endif + #ifndef HAVE_SIGSETJMP /* If sigsetjmp is a macro, configure won't pick it up. */ # ifdef sigsetjmp @@ -90,7 +98,7 @@ as that of the covered work. */ #ifdef TESTING #include "test.h" -#endif +#endif static void memfatal (const char *context, long attempted_size) @@ -138,7 +146,8 @@ memfatal (const char *context, long attempted_size) Vertical bar (|) Characters escaped by "^": - SP ! # % & ' ( ) + , . ; = @ [ ] ^ ` { } ~ + SP ! " # % & ' ( ) + , . : ; = + @ [ \ ] ^ ` { | } ~ Either "^_" or "^ " is accepted as a space. Period (.) is a special case. Note that un-escaped < and > can also confuse a directory @@ -172,22 +181,22 @@ unsigned char char_prop[ 256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' ( ) * + , - . / */ - 2, 1, 0, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0, + 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0, /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ - 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 0, 1, 1, 1, 1, 1, + 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1, /* @ A B C D E F G H I J K L M N O */ 1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16, /* P Q R S T U V W X Y Z [ \ ] ^ _ */ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16, /* ` a b c d e f g h i j k l m n o */ 1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, /* p q r s t u v w x y z { | } ~ DEL */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 0, 1, 17, 8, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -271,7 +280,7 @@ sepstring (const char *s) fallback implementation of vsnprintf, this should be portable. */ /* Constant is using for limits memory allocation for text buffer. - Applicable in situation when: vasprintf is not available in the system + Applicable in situation when: vasprintf is not available in the system and vsnprintf return -1 when long line is truncated (in old versions of glibc and in other system where C99 doesn`t support) */ @@ -323,7 +332,7 @@ aprintf (const char *fmt, ...) else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */ { /* maybe we have some wrong format string? */ - logprintf (LOG_ALWAYS, + logprintf (LOG_ALWAYS, _("%s: aprintf: text buffer is too big (%ld bytes), " "aborting.\n"), exec_name, size); /* printout a log message */ @@ -491,18 +500,40 @@ fork_to_background (void) void touch (const char *file, time_t tm) { -#ifdef HAVE_STRUCT_UTIMBUF +#if HAVE_UTIME +# ifdef HAVE_STRUCT_UTIMBUF struct utimbuf times; -#else +# else struct { time_t actime; time_t modtime; } times; -#endif +# endif times.modtime = tm; times.actime = time (NULL); if (utime (file, ×) == -1) logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); +#else + struct timespec timespecs[2]; + int fd; + + fd = open (file, O_WRONLY); + if (fd < 0) + { + logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno)); + return; + } + + timespecs[0].tv_sec = time (NULL); + timespecs[0].tv_nsec = 0L; + timespecs[1].tv_sec = tm; + timespecs[1].tv_nsec = 0L; + + if (futimens (fd, timespecs) == -1) + logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno)); + + close (fd); +#endif } /* Checks if FILE is a symbolic link, and removes it if it is. Does @@ -696,9 +727,9 @@ unique_create (const char *name, bool binary, char **opened_name) If opening the file fails for any reason, including the file having previously existed, this function returns NULL and sets errno appropriately. */ - + FILE * -fopen_excl (const char *fname, bool binary) +fopen_excl (const char *fname, int binary) { int fd; #ifdef O_EXCL @@ -743,8 +774,7 @@ fopen_excl (const char *fname, bool binary) open_id = 13; fd = open( fname, /* File name. */ flags, /* Flags. */ - 0777, /* Mode for default protection. -*/ + 0777, /* Mode for default protection. */ "rfm=stmlf", /* Stream_LF. */ OPEN_OPT_ARGS); /* Access callback. */ } @@ -872,6 +902,9 @@ acceptable (const char *s) { int l = strlen (s); + if (opt.output_document && strcmp (s, opt.output_document) == 0) + return true; + while (l && s[l] != '/') --l; if (s[l] == '/') @@ -889,8 +922,21 @@ acceptable (const char *s) return true; } +/* Determine whether an URL is acceptable to be followed, according to + regex patterns to accept/reject. */ +bool +accept_url (const char *s) +{ + if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s)) + return false; + if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s)) + return false; + + return true; +} + /* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() - will return true if and only if D2 begins with `/something/' or is exactly + will return true if and only if D2 begins with `/something/' or is exactly '/something'. */ bool subdir_p (const char *d1, const char *d2) @@ -903,7 +949,7 @@ subdir_p (const char *d1, const char *d2) else for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2) ; - + return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); } @@ -932,7 +978,7 @@ dir_matches_p (char **dirlist, const char *dir) break; } } - + return *x ? true : false; } @@ -1069,7 +1115,7 @@ has_wildcards_p (const char *s) /* Return true if FNAME ends with a typical HTML suffix. The following (case-insensitive) suffixes are presumed to be HTML files: - + html htm ?html (`?' matches one character) @@ -1148,18 +1194,18 @@ read_whole_line (FILE *fp) zero-terminated, and you should *not* read or write beyond the [0, length) range of characters. - After you are done with the file contents, call read_file_free to + After you are done with the file contents, call wget_read_file_free to release the memory. Depending on the operating system and the type of file that is - being read, read_file() either mmap's the file into memory, or + being read, wget_read_file() either mmap's the file into memory, or reads the file into the core using read(). If file is named "-", fileno(stdin) is used for reading instead. If you want to read from a real file named "-", use "./-" instead. */ struct file_memory * -read_file (const char *file) +wget_read_file (const char *file) { int fd; struct file_memory *fm; @@ -1269,7 +1315,7 @@ read_file (const char *file) memory needed to hold the FM structure itself. */ void -read_file_free (struct file_memory *fm) +wget_read_file_free (struct file_memory *fm) { #ifdef HAVE_MMAP if (fm->mmap_p) @@ -1797,6 +1843,17 @@ number_to_static_string (wgint number) ringpos = (ringpos + 1) % RING_SIZE; return buf; } + +/* Converts the byte to bits format if --report-bps option is enabled + */ +wgint +convert_to_bits (wgint num) +{ + if (opt.report_bps) + return num * 8; + return num; +} + /* Determine the width of the terminal we're running on. If that's not possible, return 0. */ @@ -1924,9 +1981,10 @@ abort_run_with_timeout (int sig) /* We don't have siglongjmp to preserve the set of blocked signals; if we longjumped out of the handler at this point, SIGALRM would remain blocked. We must unblock it manually. */ - int mask = siggetmask (); - mask &= ~sigmask (SIGALRM); - sigsetmask (mask); + sigset_t set; + sigemptyset (&set); + sigaddset (&set, SIGALRM); + sigprocmask (SIG_BLOCK, &set, NULL); /* Now it's safe to longjump. */ longjmp (run_with_timeout_env, -1); @@ -2269,6 +2327,89 @@ base64_decode (const char *base64, void *dest) return q - (char *) dest; } +#ifdef HAVE_LIBPCRE +/* Compiles the PCRE regex. */ +void * +compile_pcre_regex (const char *str) +{ + const char *errbuf; + int erroffset; + pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0); + if (! regex) + { + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + return false; + } + return regex; +} +#endif + +/* Compiles the POSIX regex. */ +void * +compile_posix_regex (const char *str) +{ + regex_t *regex = xmalloc (sizeof (regex_t)); + int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB); + if (errcode != 0) + { + int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + regerror (errcode, (regex_t *) regex, errbuf, errbuf_size); + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + xfree (errbuf); + return NULL; + } + + return regex; +} + +#ifdef HAVE_LIBPCRE +#define OVECCOUNT 30 +/* Matches a PCRE regex. */ +bool +match_pcre_regex (const void *regex, const char *str) +{ + int l = strlen (str); + int ovector[OVECCOUNT]; + + int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT); + if (rc == PCRE_ERROR_NOMATCH) + return false; + else if (rc < 0) + { + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + return false; + } + else + return true; +} +#undef OVECCOUNT +#endif + +/* Matches a POSIX regex. */ +bool +match_posix_regex (const void *regex, const char *str) +{ + int rc = regexec ((regex_t *) regex, str, 0, NULL, 0); + if (rc == REG_NOMATCH) + return false; + else if (rc == 0) + return true; + else + { + int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + regerror (rc, opt.acceptregex, errbuf, errbuf_size); + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + xfree (errbuf); + return false; + } +} + #undef IS_ASCII #undef NEXT_CHAR @@ -2368,12 +2509,12 @@ test_subdir_p() { "/somedir", "/somedir/d2", true }, { "/somedir/d1", "/somedir", false }, }; - - for (i = 0; i < countof(test_array); ++i) + + for (i = 0; i < countof(test_array); ++i) { bool res = subdir_p (test_array[i].d1, test_array[i].d2); - mu_assert ("test_subdir_p: wrong result", + mu_assert ("test_subdir_p: wrong result", res == test_array[i].result); } @@ -2405,12 +2546,12 @@ test_dir_matches_p() { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false }, { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false }, }; - - for (i = 0; i < countof(test_array); ++i) + + for (i = 0; i < countof(test_array); ++i) { bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); - - mu_assert ("test_dir_matches_p: wrong result", + + mu_assert ("test_dir_matches_p: wrong result", res == test_array[i].result); }