X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Futils.c;h=55a8a8d286959dc50aa4d15d8b51a1840c7ca9e7;hp=4188ced7d2ab409097b3d4a60650209255732674;hb=f5a10978710a3e9907fbef31b7df9f414acccb16;hpb=0aa3c5d33c5faa8902fa638c36314deae45460f3 diff --git a/src/utils.c b/src/utils.c index 4188ced7..55a8a8d2 100644 --- a/src/utils.c +++ b/src/utils.c @@ -73,6 +73,11 @@ as that of the covered work. */ #include #include +#include +#ifdef HAVE_LIBPCRE +# include +#endif + #ifndef HAVE_SIGSETJMP /* If sigsetjmp is a macro, configure won't pick it up. */ # ifdef sigsetjmp @@ -917,6 +922,19 @@ acceptable (const char *s) return true; } +/* Determine whether an URL is acceptable to be followed, according to + regex patterns to accept/reject. */ +bool +accept_url (const char *s) +{ + if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s)) + return false; + if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s)) + return false; + + return true; +} + /* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() will return true if and only if D2 begins with `/something/' or is exactly '/something'. */ @@ -2309,6 +2327,89 @@ base64_decode (const char *base64, void *dest) return q - (char *) dest; } +#ifdef HAVE_LIBPCRE +/* Compiles the PCRE regex. */ +void * +compile_pcre_regex (const char *str) +{ + const char *errbuf; + int erroffset; + pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0); + if (! regex) + { + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + return false; + } + return regex; +} +#endif + +/* Compiles the POSIX regex. */ +void * +compile_posix_regex (const char *str) +{ + regex_t *regex = xmalloc (sizeof (regex_t)); + int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB); + if (errcode != 0) + { + int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + errbuf_size = regerror (errcode, (regex_t *) regex, errbuf, errbuf_size); + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + xfree (errbuf); + return NULL; + } + + return regex; +} + +#ifdef HAVE_LIBPCRE +#define OVECCOUNT 30 +/* Matches a PCRE regex. */ +bool +match_pcre_regex (const void *regex, const char *str) +{ + int l = strlen (str); + int ovector[OVECCOUNT]; + + int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT); + if (rc == PCRE_ERROR_NOMATCH) + return false; + else if (rc < 0) + { + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + return false; + } + else + return true; +} +#undef OVECCOUNT +#endif + +/* Matches a POSIX regex. */ +bool +match_posix_regex (const void *regex, const char *str) +{ + int rc = regexec ((regex_t *) regex, str, 0, NULL, 0); + if (rc == REG_NOMATCH) + return false; + else if (rc == 0) + return true; + else + { + int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + errbuf_size = regerror (rc, opt.acceptregex, errbuf, errbuf_size); + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + xfree (errbuf); + return false; + } +} + #undef IS_ASCII #undef NEXT_CHAR