X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Futils.c;h=8168fb38719d8880d08ef53608a0a31afe86720c;hp=d7683075dc3e352cb0d1729f691d3cb0a2d78777;hb=HEAD;hpb=cf3c678c8246fc326b69ae64b4e2766a69df5704 diff --git a/src/utils.c b/src/utils.c index d7683075..8168fb38 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,6 +1,7 @@ /* Various utility functions. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -34,34 +35,36 @@ as that of the covered work. */ #include #include #include -#ifdef HAVE_SYS_TIME_H -# include -#endif -#ifdef HAVE_UNISTD_H -# include -#endif +#include #ifdef HAVE_MMAP # include #endif #ifdef HAVE_PROCESS_H # include /* getpid() */ #endif -#ifdef HAVE_UTIME_H -# include -#endif -#ifdef HAVE_SYS_UTIME_H -# include -#endif #include #include #include #include #include -/* For TIOCGWINSZ and friends: */ -#ifdef HAVE_SYS_IOCTL_H -# include +#if HAVE_UTIME +# include +# ifdef HAVE_UTIME_H +# include +# endif + +# ifdef HAVE_SYS_UTIME_H +# include +# endif #endif + +#include + +#include + +/* For TIOCGWINSZ and friends: */ +#include #ifdef HAVE_TERMIOS_H # include #endif @@ -70,6 +73,11 @@ as that of the covered work. */ #include #include +#include +#ifdef HAVE_LIBPCRE +# include +#endif + #ifndef HAVE_SIGSETJMP /* If sigsetjmp is a macro, configure won't pick it up. */ # ifdef sigsetjmp @@ -476,9 +484,12 @@ fork_to_background (void) /* child: give up the privileges and keep running. */ setsid (); - freopen ("/dev/null", "r", stdin); - freopen ("/dev/null", "w", stdout); - freopen ("/dev/null", "w", stderr); + if (freopen ("/dev/null", "r", stdin) == NULL) + DEBUGP (("Failed to redirect stdin to /dev/null.\n")); + if (freopen ("/dev/null", "w", stdout) == NULL) + DEBUGP (("Failed to redirect stdout to /dev/null.\n")); + if (freopen ("/dev/null", "w", stderr) == NULL) + DEBUGP (("Failed to redirect stderr to /dev/null.\n")); } #endif /* !WINDOWS && !MSDOS */ @@ -492,18 +503,40 @@ fork_to_background (void) void touch (const char *file, time_t tm) { -#ifdef HAVE_STRUCT_UTIMBUF +#if HAVE_UTIME +# ifdef HAVE_STRUCT_UTIMBUF struct utimbuf times; -#else +# else struct { time_t actime; time_t modtime; } times; -#endif +# endif times.modtime = tm; times.actime = time (NULL); if (utime (file, ×) == -1) logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); +#else + struct timespec timespecs[2]; + int fd; + + fd = open (file, O_WRONLY); + if (fd < 0) + { + logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno)); + return; + } + + timespecs[0].tv_sec = time (NULL); + timespecs[0].tv_nsec = 0L; + timespecs[1].tv_sec = tm; + timespecs[1].tv_nsec = 0L; + + if (futimens (fd, timespecs) == -1) + logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno)); + + close (fd); +#endif } /* Checks if FILE is a symbolic link, and removes it if it is. Does @@ -673,7 +706,7 @@ unique_create (const char *name, bool binary, char **opened_name) xfree (uname); uname = unique_name (name, false); } - if (opened_name && fp != NULL) + if (opened_name) { if (fp) *opened_name = uname; @@ -744,8 +777,7 @@ fopen_excl (const char *fname, int binary) open_id = 13; fd = open( fname, /* File name. */ flags, /* Flags. */ - 0777, /* Mode for default protection. -*/ + 0777, /* Mode for default protection. */ "rfm=stmlf", /* Stream_LF. */ OPEN_OPT_ARGS); /* Access callback. */ } @@ -871,12 +903,14 @@ static bool in_acclist (const char *const *, const char *, bool); bool acceptable (const char *s) { - int l = strlen (s); + const char *p; + + if (opt.output_document && strcmp (s, opt.output_document) == 0) + return true; + + if ((p = strrchr (s, '/'))) + s = p + 1; - while (l && s[l] != '/') - --l; - if (s[l] == '/') - s += (l + 1); if (opt.accepts) { if (opt.rejects) @@ -887,6 +921,20 @@ acceptable (const char *s) } else if (opt.rejects) return !in_acclist ((const char *const *)opt.rejects, s, true); + + return true; +} + +/* Determine whether an URL is acceptable to be followed, according to + regex patterns to accept/reject. */ +bool +accept_url (const char *s) +{ + if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s)) + return false; + if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s)) + return false; + return true; } @@ -912,16 +960,16 @@ subdir_p (const char *d1, const char *d2) first element that matches DIR, through wildcards or front comparison (as appropriate). */ static bool -dir_matches_p (char **dirlist, const char *dir) +dir_matches_p (const char **dirlist, const char *dir) { - char **x; + const char **x; int (*matcher) (const char *, const char *, int) = opt.ignore_case ? fnmatch_nocase : fnmatch; for (x = dirlist; *x; x++) { /* Remove leading '/' */ - char *p = *x + (**x == '/'); + const char *p = *x + (**x == '/'); if (has_wildcards_p (p)) { if (matcher (p, dir, FNM_PATHNAME) == 0) @@ -973,29 +1021,15 @@ accdir (const char *directory) bool match_tail (const char *string, const char *tail, bool fold_case) { - int i, j; + int pos = strlen (string) - strlen (tail); - /* We want this to be fast, so we code two loops, one with - case-folding, one without. */ + if (pos < 0) + return false; /* tail is longer than string. */ if (!fold_case) - { - for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) - if (string[i] != tail[j]) - break; - } + return !strcmp (string + pos, tail); else - { - for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) - if (c_tolower (string[i]) != c_tolower (tail[j])) - break; - } - - /* If the tail was exhausted, the match was succesful. */ - if (j == -1) - return true; - else - return false; + return !strcasecmp (string + pos, tail); } /* Checks whether string S matches each element of ACCEPTS. A list @@ -1044,15 +1078,12 @@ in_acclist (const char *const *accepts, const char *s, bool backward) char * suffix (const char *str) { - int i; + char *p; - for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--) - ; + if ((p = strrchr (str, '.')) && !strchr (p + 1, '/')) + return p + 1; - if (str[i++] == '.') - return (char *)str + i; - else - return NULL; + return NULL; } /* Return true if S contains globbing wildcards (`*', `?', `[' or @@ -1061,10 +1092,7 @@ suffix (const char *str) bool has_wildcards_p (const char *s) { - for (; *s; s++) - if (*s == '*' || *s == '?' || *s == '[' || *s == ']') - return true; - return false; + return !!strpbrk (s, "*?[]"); } /* Return true if FNAME ends with a typical HTML suffix. The @@ -1093,56 +1121,6 @@ has_html_suffix_p (const char *fname) return false; } -/* Read a line from FP and return the pointer to freshly allocated - storage. The storage space is obtained through malloc() and should - be freed with free() when it is no longer needed. - - The length of the line is not limited, except by available memory. - The newline character at the end of line is retained. The line is - terminated with a zero character. - - After end-of-file is encountered without anything being read, NULL - is returned. NULL is also returned on error. To distinguish - between these two cases, use the stdio function ferror(). */ - -char * -read_whole_line (FILE *fp) -{ - int length = 0; - int bufsize = 82; - char *line = xmalloc (bufsize); - - while (fgets (line + length, bufsize - length, fp)) - { - length += strlen (line + length); - if (length == 0) - /* Possible for example when reading from a binary file where - a line begins with \0. */ - continue; - - if (line[length - 1] == '\n') - break; - - /* fgets() guarantees to read the whole line, or to use up the - space we've given it. We can double the buffer - unconditionally. */ - bufsize <<= 1; - line = xrealloc (line, bufsize); - } - if (length == 0 || ferror (fp)) - { - xfree (line); - return NULL; - } - if (length + 1 < bufsize) - /* Relieve the memory from our exponential greediness. We say - `length + 1' because the terminating \0 is not included in - LENGTH. We don't need to zero-terminate the string ourselves, - though, because fgets() does that. */ - line = xrealloc (line, length + 1); - return line; -} - /* Read FILE into memory. A pointer to `struct file_memory' are returned; use struct element `content' to access file contents, and the element `length' to know the file length. `content' is *not* @@ -1545,7 +1523,7 @@ with_thousand_seps (wgint n) some detail. */ char * -human_readable (HR_NUMTYPE n) +human_readable (HR_NUMTYPE n, const int acc, const int decimals) { /* These suffixes are compatible with those of GNU `ls -lh'. */ static char powers[] = @@ -1578,10 +1556,10 @@ human_readable (HR_NUMTYPE n) if ((n / 1024) < 1024 || i == countof (powers) - 1) { double val = n / 1024.0; - /* Print values smaller than 10 with one decimal digits, and - others without any decimals. */ + /* Print values smaller than the accuracy level (acc) with (decimal) + * decimal digits, and others without any decimals. */ snprintf (buf, sizeof (buf), "%.*f%c", - val < 10 ? 1 : 0, val, powers[i]); + val < acc ? decimals : 0, val, powers[i]); return buf; } n /= 1024; @@ -1798,6 +1776,17 @@ number_to_static_string (wgint number) ringpos = (ringpos + 1) % RING_SIZE; return buf; } + +/* Converts the byte to bits format if --report-bps option is enabled + */ +wgint +convert_to_bits (wgint num) +{ + if (opt.report_bps) + return num * 8; + return num; +} + /* Determine the width of the terminal we're running on. If that's not possible, return 0. */ @@ -1925,9 +1914,10 @@ abort_run_with_timeout (int sig) /* We don't have siglongjmp to preserve the set of blocked signals; if we longjumped out of the handler at this point, SIGALRM would remain blocked. We must unblock it manually. */ - int mask = siggetmask (); - mask &= ~sigmask (SIGALRM); - sigsetmask (mask); + sigset_t set; + sigemptyset (&set); + sigaddset (&set, SIGALRM); + sigprocmask (SIG_BLOCK, &set, NULL); /* Now it's safe to longjump. */ longjmp (run_with_timeout_env, -1); @@ -2115,8 +2105,8 @@ xsleep (double seconds) This implementation does not emit newlines after 76 characters of base64 data. */ -int -base64_encode (const void *data, int length, char *dest) +size_t +base64_encode (const void *data, size_t length, char *dest) { /* Conversion table. */ static const char tbl[64] = { @@ -2183,7 +2173,7 @@ base64_encode (const void *data, int length, char *dest) This function originates from Free Recode. */ -int +ssize_t base64_decode (const char *base64, void *dest) { /* Table of base64 values for first 128 characters. Note that this @@ -2270,6 +2260,89 @@ base64_decode (const char *base64, void *dest) return q - (char *) dest; } +#ifdef HAVE_LIBPCRE +/* Compiles the PCRE regex. */ +void * +compile_pcre_regex (const char *str) +{ + const char *errbuf; + int erroffset; + pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0); + if (! regex) + { + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + return false; + } + return regex; +} +#endif + +/* Compiles the POSIX regex. */ +void * +compile_posix_regex (const char *str) +{ + regex_t *regex = xmalloc (sizeof (regex_t)); + int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB); + if (errcode != 0) + { + size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + regerror (errcode, (regex_t *) regex, errbuf, errbuf_size); + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + xfree (errbuf); + return NULL; + } + + return regex; +} + +#ifdef HAVE_LIBPCRE +#define OVECCOUNT 30 +/* Matches a PCRE regex. */ +bool +match_pcre_regex (const void *regex, const char *str) +{ + size_t l = strlen (str); + int ovector[OVECCOUNT]; + + int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT); + if (rc == PCRE_ERROR_NOMATCH) + return false; + else if (rc < 0) + { + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + return false; + } + else + return true; +} +#undef OVECCOUNT +#endif + +/* Matches a POSIX regex. */ +bool +match_posix_regex (const void *regex, const char *str) +{ + int rc = regexec ((regex_t *) regex, str, 0, NULL, 0); + if (rc == REG_NOMATCH) + return false; + else if (rc == 0) + return true; + else + { + size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + regerror (rc, opt.acceptregex, errbuf, errbuf_size); + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + xfree (errbuf); + return false; + } +} + #undef IS_ASCII #undef NEXT_CHAR @@ -2354,21 +2427,75 @@ print_decimal (double number) return buf; } +/* Get the maximum name length for the given path. */ +/* Return 0 if length is unknown. */ +long +get_max_length (const char *path, int length, int name) +{ + long ret; + char *p, *d; + + /* Make a copy of the path that we can modify. */ + p = path ? strdupdelim (path, path + length) : strdup (""); + + for (;;) + { + errno = 0; + /* For an empty path query the current directory. */ +#if HAVE_PATHCONF + ret = pathconf (*p ? p : ".", name); + if (!(ret < 0 && errno == ENOENT)) + break; +#else + ret = PATH_MAX; +#endif + + /* The path does not exist yet, but may be created. */ + /* Already at current or root directory, give up. */ + if (!*p || strcmp (p, "/") == 0) + break; + + /* Remove one directory level and try again. */ + d = strrchr (p, '/'); + if (d == p) + p[1] = '\0'; /* check root directory */ + else if (d) + *d = '\0'; /* remove last directory part */ + else + *p = '\0'; /* check current directory */ + } + + xfree (p); + + if (ret < 0) + { + /* pathconf() has a message for us. */ + if (errno != 0) + perror ("pathconf"); + + /* If (errno == 0) then there is no max length. + Even on error return 0 so the caller can continue. */ + return 0; + } + + return ret; +} + #ifdef TESTING const char * -test_subdir_p() +test_subdir_p(void) { - int i; - struct { - char *d1; - char *d2; + static const struct { + const char *d1; + const char *d2; bool result; } test_array[] = { { "/somedir", "/somedir", true }, { "/somedir", "/somedir/d2", true }, { "/somedir/d1", "/somedir", false }, }; + unsigned i; for (i = 0; i < countof(test_array); ++i) { @@ -2382,12 +2509,11 @@ test_subdir_p() } const char * -test_dir_matches_p() +test_dir_matches_p(void) { - int i; - struct { - char *dirlist[3]; - char *dir; + static struct { + const char *dirlist[3]; + const char *dir; bool result; } test_array[] = { { { "/somedir", "/someotherdir", NULL }, "somedir", true }, @@ -2406,6 +2532,7 @@ test_dir_matches_p() { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false }, { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false }, }; + unsigned i; for (i = 0; i < countof(test_array); ++i) {