X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=8168fb38719d8880d08ef53608a0a31afe86720c;hb=HEAD;hp=2c6fd784570ed187ed54f424cd743716d06443a4;hpb=1365950c01a87e8ede70da4d3937d379e3050d13;p=wget diff --git a/src/utils.c b/src/utils.c index 2c6fd784..8168fb38 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,12 +1,13 @@ -/* Various functions of utilitarian nature. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 - Free Software Foundation, Inc. +/* Various utility functions. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -15,291 +16,198 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +along with Wget. If not, see . -#include +Additional permission under GNU GPL version 3 section 7 + +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" #include #include -#ifdef HAVE_STRING_H -# include -#else /* not HAVE_STRING_H */ -# include -#endif /* not HAVE_STRING_H */ -#include -#ifdef HAVE_UNISTD_H -# include -#endif +#include +#include +#include #ifdef HAVE_MMAP # include #endif -#ifdef HAVE_PWD_H -# include -#endif -#include -#ifdef HAVE_UTIME_H -# include -#endif -#ifdef HAVE_SYS_UTIME_H -# include +#ifdef HAVE_PROCESS_H +# include /* getpid() */ #endif #include -#ifdef NeXT -# include /* for access() */ -#endif #include #include +#include +#include -/* For TIOCGWINSZ and friends: */ -#ifdef HAVE_SYS_IOCTL_H -# include -#endif -#ifdef HAVE_TERMIOS_H -# include -#endif - -#include "wget.h" -#include "utils.h" -#include "fnmatch.h" -#include "hash.h" +#if HAVE_UTIME +# include +# ifdef HAVE_UTIME_H +# include +# endif -#ifndef errno -extern int errno; +# ifdef HAVE_SYS_UTIME_H +# include +# endif #endif -/* This section implements several wrappers around the basic - allocation routines. This is done for two reasons: first, so that - the callers of these functions need not consistently check for - errors. If there is not enough virtual memory for running Wget, - something is seriously wrong, and Wget exits with an appropriate - error message. +#include - The second reason why these are useful is that, if DEBUG_MALLOC is - defined, they also provide a handy (if crude) malloc debugging - interface that checks memory leaks. */ +#include -/* Croak the fatal memory error and bail out with non-zero exit - status. */ -static void -memfatal (const char *what) -{ - /* Make sure we don't try to store part of the log line, and thus - call malloc. */ - log_set_save_context (0); - logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what); - exit (1); -} +/* For TIOCGWINSZ and friends: */ +#include +#ifdef HAVE_TERMIOS_H +# include +#endif -/* These functions end with _real because they need to be - distinguished from the debugging functions, and from the macros. - Explanation follows: +/* Needed for Unix version of run_with_timeout. */ +#include +#include - If memory debugging is not turned on, wget.h defines these: +#include +#ifdef HAVE_LIBPCRE +# include +#endif - #define xmalloc xmalloc_real - #define xrealloc xrealloc_real - #define xstrdup xstrdup_real - #define xfree free +#ifndef HAVE_SIGSETJMP +/* If sigsetjmp is a macro, configure won't pick it up. */ +# ifdef sigsetjmp +# define HAVE_SIGSETJMP +# endif +#endif - In case of memory debugging, the definitions are a bit more - complex, because we want to provide more information, *and* we want - to call the debugging code. (The former is the reason why xmalloc - and friends need to be macros in the first place.) Then it looks - like this: +#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK +# define USE_SIGNAL_TIMEOUT +#endif - #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__) - #define xfree(a) xfree_debug (a, __FILE__, __LINE__) - #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__) - #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__) +#include "utils.h" +#include "hash.h" - Each of the *_debug function does its magic and calls the real one. */ +#ifdef __VMS +#include "vms.h" +#endif /* def __VMS */ -#ifdef DEBUG_MALLOC -# define STATIC_IF_DEBUG static -#else -# define STATIC_IF_DEBUG +#ifdef TESTING +#include "test.h" #endif -STATIC_IF_DEBUG void * -xmalloc_real (size_t size) -{ - void *ptr = malloc (size); - if (!ptr) - memfatal ("malloc"); - return ptr; -} - -STATIC_IF_DEBUG void * -xrealloc_real (void *ptr, size_t newsize) +static void +memfatal (const char *context, long attempted_size) { - void *newptr; + /* Make sure we don't try to store part of the log line, and thus + call malloc. */ + log_set_save_context (false); - /* Not all Un*xes have the feature of realloc() that calling it with - a NULL-pointer is the same as malloc(), but it is easy to - simulate. */ - if (ptr) - newptr = realloc (ptr, newsize); + /* We have different log outputs in different situations: + 1) output without bytes information + 2) output with bytes information */ + if (attempted_size == UNKNOWN_ATTEMPTED_SIZE) + { + logprintf (LOG_ALWAYS, + _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"), + exec_name, context); + } else - newptr = malloc (newsize); - if (!newptr) - memfatal ("realloc"); - return newptr; -} - -STATIC_IF_DEBUG char * -xstrdup_real (const char *s) -{ - char *copy; - -#ifndef HAVE_STRDUP - int l = strlen (s); - copy = malloc (l + 1); - if (!copy) - memfatal ("strdup"); - memcpy (copy, s, l + 1); -#else /* HAVE_STRDUP */ - copy = strdup (s); - if (!copy) - memfatal ("strdup"); -#endif /* HAVE_STRDUP */ + { + logprintf (LOG_ALWAYS, + _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"), + exec_name, context, attempted_size); + } - return copy; + exit (1); } -#ifdef DEBUG_MALLOC +/* Character property table for (re-)escaping VMS ODS5 extended file + names. Note that this table ignores Unicode. -/* Crude home-grown routines for debugging some malloc-related - problems. Featured: + ODS2 valid characters: 0-9 A-Z a-z $ - _ ~ - * Counting the number of malloc and free invocations, and reporting - the "balance", i.e. how many times more malloc was called than it - was the case with free. + ODS5 Invalid characters: + C0 control codes (0x00 to 0x1F inclusive) + Asterisk (*) + Question mark (?) - * Making malloc store its entry into a simple array and free remove - stuff from that array. At the end, print the pointers which have - not been freed, along with the source file and the line number. - This also has the side-effect of detecting freeing memory that - was never allocated. + ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?): + Double quotation marks (") + Backslash (\) + Colon (:) + Left angle bracket (<) + Right angle bracket (>) + Slash (/) + Vertical bar (|) - Note that this kind of memory leak checking strongly depends on - every malloc() being followed by a free(), even if the program is - about to finish. Wget is careful to free the data structure it - allocated in init.c. */ + Characters escaped by "^": + SP ! " # % & ' ( ) + , . : ; = + @ [ \ ] ^ ` { | } ~ -static int malloc_count, free_count; + Either "^_" or "^ " is accepted as a space. Period (.) is a special + case. Note that un-escaped < and > can also confuse a directory + spec. -static struct { - char *ptr; - const char *file; - int line; -} malloc_debug[100000]; + Characters put out as ^xx: + 7F (DEL) + 80-9F (C1 control characters) + A0 (nonbreaking space) + FF (Latin small letter y diaeresis) -/* Both register_ptr and unregister_ptr take O(n) operations to run, - which can be a real problem. It would be nice to use a hash table - for malloc_debug, but the functions in hash.c are not suitable - because they can call malloc() themselves. Maybe it would work if - the hash table were preallocated to a huge size, and if we set the - rehash threshold to 1.0. */ + Other cases: + Unicode: "^Uxxxx", where "xxxx" is four hex digits. -/* Register PTR in malloc_debug. Abort if this is not possible - (presumably due to the number of current allocations exceeding the - size of malloc_debug.) */ + Property table values: + Normal escape: 1 + Space: 2 + Dot: 4 + Hex-hex escape: 8 + ODS2 normal: 16 + ODS2 lower case: 32 + Hex digit: 64 +*/ -static void -register_ptr (void *ptr, const char *file, int line) -{ - int i; - for (i = 0; i < ARRAY_SIZE (malloc_debug); i++) - if (malloc_debug[i].ptr == NULL) - { - malloc_debug[i].ptr = ptr; - malloc_debug[i].file = file; - malloc_debug[i].line = line; - return; - } - abort (); -} +unsigned char char_prop[ 256] = { -/* Unregister PTR from malloc_debug. Abort if PTR is not present in - malloc_debug. (This catches calling free() with a bogus pointer.) */ +/* NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -static void -unregister_ptr (void *ptr) -{ - int i; - for (i = 0; i < ARRAY_SIZE (malloc_debug); i++) - if (malloc_debug[i].ptr == ptr) - { - malloc_debug[i].ptr = NULL; - return; - } - abort (); -} +/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* Print the malloc debug stats that can be gathered from the above - information. Currently this is the count of mallocs, frees, the - difference between the two, and the dump of the contents of - malloc_debug. The last part are the memory leaks. */ +/* SP ! " # $ % & ' ( ) * + , - . / */ + 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0, -void -print_malloc_debug_stats (void) -{ - int i; - printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n", - malloc_count, free_count, malloc_count - free_count); - for (i = 0; i < ARRAY_SIZE (malloc_debug); i++) - if (malloc_debug[i].ptr != NULL) - printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr, - malloc_debug[i].file, malloc_debug[i].line); -} +/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ + 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1, -void * -xmalloc_debug (size_t size, const char *source_file, int source_line) -{ - void *ptr = xmalloc_real (size); - ++malloc_count; - register_ptr (ptr, source_file, source_line); - return ptr; -} +/* @ A B C D E F G H I J K L M N O */ + 1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16, -void -xfree_debug (void *ptr, const char *source_file, int source_line) -{ - assert (ptr != NULL); - ++free_count; - unregister_ptr (ptr); - free (ptr); -} +/* P Q R S T U V W X Y Z [ \ ] ^ _ */ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16, -void * -xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line) -{ - void *newptr = xrealloc_real (ptr, newsize); - if (!ptr) - { - ++malloc_count; - register_ptr (newptr, source_file, source_line); - } - else if (newptr != ptr) - { - unregister_ptr (ptr); - register_ptr (newptr, source_file, source_line); - } - return newptr; -} +/* ` a b c d e f g h i j k l m n o */ + 1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, -char * -xstrdup_debug (const char *s, const char *source_file, int source_line) -{ - char *copy = xstrdup_real (s); - ++malloc_count; - register_ptr (copy, source_file, source_line); - return copy; -} +/* p q r s t u v w x y z { | } ~ DEL */ + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8, + + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 +}; -#endif /* DEBUG_MALLOC */ - /* Utility function: like xstrdup(), but also lowercases S. */ char * @@ -308,30 +216,17 @@ xstrdup_lower (const char *s) char *copy = xstrdup (s); char *p = copy; for (; *p; p++) - *p = TOLOWER (*p); + *p = c_tolower (*p); return copy; } -/* Return a count of how many times CHR occurs in STRING. */ - -int -count_char (const char *string, char chr) -{ - const char *p; - int count = 0; - for (p = string; *p; p++) - if (*p == chr) - ++count; - return count; -} - /* Copy the string formed by two pointers (one on the beginning, other on the char after the last char) to a new, malloc-ed location. 0-terminate it. */ char * strdupdelim (const char *beg, const char *end) { - char *res = (char *)xmalloc (end - beg + 1); + char *res = xmalloc (end - beg + 1); memcpy (res, beg, end - beg); res[end - beg] = '\0'; return res; @@ -354,93 +249,222 @@ sepstring (const char *s) while (*s) { if (*s == ',') - { - res = (char **)xrealloc (res, (i + 2) * sizeof (char *)); - res[i] = strdupdelim (p, s); - res[++i] = NULL; - ++s; - /* Skip the blanks following the ','. */ - while (ISSPACE (*s)) - ++s; - p = s; - } + { + res = xrealloc (res, (i + 2) * sizeof (char *)); + res[i] = strdupdelim (p, s); + res[++i] = NULL; + ++s; + /* Skip the blanks following the ','. */ + while (c_isspace (*s)) + ++s; + p = s; + } else - ++s; + ++s; } - res = (char **)xrealloc (res, (i + 2) * sizeof (char *)); + res = xrealloc (res, (i + 2) * sizeof (char *)); res[i] = strdupdelim (p, s); res[i + 1] = NULL; return res; } -/* Return pointer to a static char[] buffer in which zero-terminated - string-representation of TM (in form hh:mm:ss) is printed. +/* Like sprintf, but prints into a string of sufficient size freshly + allocated with malloc, which is returned. If unable to print due + to invalid format, returns NULL. Inability to allocate needed + memory results in abort, as with xmalloc. This is in spirit + similar to the GNU/BSD extension asprintf, but somewhat easier to + use. - If TM is non-NULL, the current time-in-seconds will be stored - there. + Internally the function either calls vasprintf or loops around + vsnprintf until the correct size is found. Since Wget also ships a + fallback implementation of vsnprintf, this should be portable. */ - (#### This is misleading: one would expect TM would be used instead - of the current time in that case. This design was probably - influenced by the design time(2), and should be changed at some - points. No callers use non-NULL TM anyway.) */ +/* Constant is using for limits memory allocation for text buffer. + Applicable in situation when: vasprintf is not available in the system + and vsnprintf return -1 when long line is truncated (in old versions of + glibc and in other system where C99 doesn`t support) */ + +#define FMT_MAX_LENGTH 1048576 char * -time_str (time_t *tm) -{ - static char output[15]; - struct tm *ptm; - time_t secs = time (tm); +aprintf (const char *fmt, ...) +{ +#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC + /* Use vasprintf. */ + int ret; + va_list args; + char *str; + va_start (args, fmt); + ret = vasprintf (&str, fmt, args); + va_end (args); + if (ret < 0 && errno == ENOMEM) + memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE); /* for consistency + with xmalloc/xrealloc */ + else if (ret < 0) + return NULL; + return str; +#else /* not HAVE_VASPRINTF */ + + /* vasprintf is unavailable. snprintf into a small buffer and + resize it as necessary. */ + int size = 32; + char *str = xmalloc (size); - if (secs == -1) + /* #### This code will infloop and eventually abort in xrealloc if + passed a FMT that causes snprintf to consistently return -1. */ + + while (1) { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; + int n; + va_list args; + + va_start (args, fmt); + n = vsnprintf (str, size, fmt, args); + va_end (args); + + /* If the printing worked, return the string. */ + if (n > -1 && n < size) + return str; + + /* Else try again with a larger buffer. */ + if (n > -1) /* C99 */ + size = n + 1; /* precisely what is needed */ + else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */ + { /* maybe we have some wrong + format string? */ + logprintf (LOG_ALWAYS, + _("%s: aprintf: text buffer is too big (%ld bytes), " + "aborting.\n"), + exec_name, size); /* printout a log message */ + abort (); /* and abort... */ + } + else + { + /* else, we continue to grow our + * buffer: Twice the old size. */ + size <<= 1; + } + str = xrealloc (str, size); } - ptm = localtime (&secs); - sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return output; +#endif /* not HAVE_VASPRINTF */ } -/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ +/* Concatenate the NULL-terminated list of string arguments into + freshly allocated space. */ char * -datetime_str (time_t *tm) +concat_strings (const char *str0, ...) { - static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */ - struct tm *ptm; - time_t secs = time (tm); + va_list args; + int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */ + char *ret, *p; + + const char *next_str; + int total_length = 0; + size_t argcount; + + /* Calculate the length of and allocate the resulting string. */ + + argcount = 0; + va_start (args, str0); + for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) + { + int len = strlen (next_str); + if (argcount < countof (saved_lengths)) + saved_lengths[argcount++] = len; + total_length += len; + } + va_end (args); + p = ret = xmalloc (total_length + 1); - if (secs == -1) + /* Copy the strings into the allocated space. */ + + argcount = 0; + va_start (args, str0); + for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) { - /* In case of error, return the empty string. Maybe we should - just abort if this happens? */ - *output = '\0'; - return output; + int len; + if (argcount < countof (saved_lengths)) + len = saved_lengths[argcount++]; + else + len = strlen (next_str); + memcpy (p, next_str, len); + p += len; } - ptm = localtime (&secs); - sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d", - ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday, - ptm->tm_hour, ptm->tm_min, ptm->tm_sec); + va_end (args); + *p = '\0'; + + return ret; +} + +/* Format the provided time according to the specified format. The + format is a string with format elements supported by strftime. */ + +static char * +fmttime (time_t t, const char *fmt) +{ + static char output[32]; + struct tm *tm = localtime(&t); + if (!tm) + abort (); + if (!strftime(output, sizeof(output), fmt, tm)) + abort (); return output; } + +/* Return pointer to a static char[] buffer in which zero-terminated + string-representation of TM (in form hh:mm:ss) is printed. + + If TM is NULL, the current time will be used. */ + +char * +time_str (time_t t) +{ + return fmttime(t, "%H:%M:%S"); +} + +/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ + +char * +datetime_str (time_t t) +{ + return fmttime(t, "%Y-%m-%d %H:%M:%S"); +} /* The Windows versions of the following two functions are defined in - mswindows.c. */ + mswindows.c. On MSDOS this function should never be called. */ -#ifndef WINDOWS +#ifdef __VMS + +void +fork_to_background (void) +{ + return; +} + +#else /* def __VMS */ + +#if !defined(WINDOWS) && !defined(MSDOS) void fork_to_background (void) { pid_t pid; /* Whether we arrange our own version of opt.lfilename here. */ - int changedp = 0; + bool logfile_changed = false; - if (!opt.lfilename) + if (!opt.lfilename && (!opt.quiet || opt.server_response)) { - opt.lfilename = unique_name (DEFAULT_LOGFILE); - changedp = 1; + /* We must create the file immediately to avoid either a race + condition (which arises from using unique_name and failing to + use fopen_excl) or lying to the user about the log file name + (which arises from using unique_name, printing the name, and + using fopen_excl later on.) */ + FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename); + if (new_log_fp) + { + logfile_changed = true; + fclose (new_log_fp); + } } pid = fork (); if (pid < 0) @@ -452,155 +476,67 @@ fork_to_background (void) else if (pid != 0) { /* parent, no error */ - printf (_("Continuing in background, pid %d.\n"), (int)pid); - if (changedp) - printf (_("Output will be written to `%s'.\n"), opt.lfilename); - exit (0); /* #### should we use _exit()? */ + printf (_("Continuing in background, pid %d.\n"), (int) pid); + if (logfile_changed) + printf (_("Output will be written to %s.\n"), quote (opt.lfilename)); + exit (0); /* #### should we use _exit()? */ } /* child: give up the privileges and keep running. */ setsid (); - freopen ("/dev/null", "r", stdin); - freopen ("/dev/null", "w", stdout); - freopen ("/dev/null", "w", stderr); + if (freopen ("/dev/null", "r", stdin) == NULL) + DEBUGP (("Failed to redirect stdin to /dev/null.\n")); + if (freopen ("/dev/null", "w", stdout) == NULL) + DEBUGP (("Failed to redirect stdout to /dev/null.\n")); + if (freopen ("/dev/null", "w", stderr) == NULL) + DEBUGP (("Failed to redirect stderr to /dev/null.\n")); } -#endif /* not WINDOWS */ - -/* Resolve "." and ".." elements of PATH by destructively modifying - PATH. "." is resolved by removing that path element, and ".." is - resolved by removing the preceding path element. Leading and - trailing slashes are preserved. - - Return non-zero if any changes have been made. - - For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive - test examples are provided below. If you change anything in this - function, run test_path_simplify to make sure you haven't broken a - test case. +#endif /* !WINDOWS && !MSDOS */ - A previous version of this function was based on path_simplify() - from GNU Bash, but it has been rewritten for Wget 1.8.1. */ - -int -path_simplify (char *path) -{ - int change = 0; - char *p, *end; - - if (path[0] == '/') - ++path; /* preserve the leading '/'. */ - - p = path; - end = p + strlen (p) + 1; /* position past the terminating zero. */ - - while (1) - { - again: - /* P should point to the beginning of a path element. */ - - if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0')) - { - /* Handle "./foo" by moving "foo" two characters to the - left. */ - if (*(p + 1) == '/') - { - change = 1; - memmove (p, p + 2, end - p); - end -= 2; - goto again; - } - else - { - change = 1; - *p = '\0'; - break; - } - } - else if (*p == '.' && *(p + 1) == '.' - && (*(p + 2) == '/' || *(p + 2) == '\0')) - { - /* Handle "../foo" by moving "foo" one path element to the - left. */ - char *b = p; /* not p-1 because P can equal PATH */ - - /* Backtrack by one path element, but not past the beginning - of PATH. */ - - /* foo/bar/../baz */ - /* ^ p */ - /* ^ b */ - - if (b > path) - { - /* Move backwards until B hits the beginning of the - previous path element or the beginning of path. */ - for (--b; b > path && *(b - 1) != '/'; b--) - ; - } - - change = 1; - if (*(p + 2) == '/') - { - memmove (b, p + 3, end - (p + 3)); - end -= (p + 3) - b; - p = b; - } - else - { - *b = '\0'; - break; - } - - goto again; - } - else if (*p == '/') - { - /* Remove empty path elements. Not mandated by rfc1808 et - al, but empty path elements are not all that useful, and - the rest of Wget might not deal with them well. */ - char *q = p; - while (*q == '/') - ++q; - change = 1; - if (*q == '\0') - { - *p = '\0'; - break; - } - memmove (p, q, end - q); - end -= q - p; - goto again; - } - - /* Skip to the next path element. */ - while (*p && *p != '/') - ++p; - if (*p == '\0') - break; - - /* Make sure P points to the beginning of the next path element, - which is location after the slash. */ - ++p; - } +#endif /* def __VMS [else] */ - return change; -} -/* "Touch" FILE, i.e. make its atime and mtime equal to the time - specified with TM. */ +/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time + specified with TM. The atime ("access time") is set to the current + time. */ + void touch (const char *file, time_t tm) { -#ifdef HAVE_STRUCT_UTIMBUF +#if HAVE_UTIME +# ifdef HAVE_STRUCT_UTIMBUF struct utimbuf times; - times.actime = times.modtime = tm; -#else - time_t times[2]; - times[0] = times[1] = tm; -#endif - +# else + struct { + time_t actime; + time_t modtime; + } times; +# endif + times.modtime = tm; + times.actime = time (NULL); if (utime (file, ×) == -1) logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); +#else + struct timespec timespecs[2]; + int fd; + + fd = open (file, O_WRONLY); + if (fd < 0) + { + logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno)); + return; + } + + timespecs[0].tv_sec = time (NULL); + timespecs[0].tv_nsec = 0L; + timespecs[1].tv_sec = tm; + timespecs[1].tv_nsec = 0L; + + if (futimens (fd, timespecs) == -1) + logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno)); + + close (fd); +#endif } /* Checks if FILE is a symbolic link, and removes it if it is. Does @@ -609,15 +545,15 @@ int remove_link (const char *file) { int err = 0; - struct stat st; + struct_stat st; if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode)) { DEBUGP (("Unlinking %s (symlink).\n", file)); err = unlink (file); if (err != 0) - logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"), - file, strerror (errno)); + logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"), + quote (file), strerror (errno)); } return err; } @@ -629,63 +565,245 @@ remove_link (const char *file) proper way should, of course, be to have a third, error state, other than true/false, but that would introduce uncalled-for additional complexity to the callers. */ -int +bool file_exists_p (const char *filename) { #ifdef HAVE_ACCESS return access (filename, F_OK) >= 0; #else - struct stat buf; + struct_stat buf; return stat (filename, &buf) >= 0; #endif } /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file). Returns 0 on error. */ -int +bool file_non_directory_p (const char *path) { - struct stat buf; + struct_stat buf; /* Use lstat() rather than stat() so that symbolic links pointing to directories can be identified correctly. */ if (lstat (path, &buf) != 0) - return 0; - return S_ISDIR (buf.st_mode) ? 0 : 1; + return false; + return S_ISDIR (buf.st_mode) ? false : true; +} + +/* Return the size of file named by FILENAME, or -1 if it cannot be + opened or seeked into. */ +wgint +file_size (const char *filename) +{ +#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO) + wgint size; + /* We use fseek rather than stat to determine the file size because + that way we can also verify that the file is readable without + explicitly checking for permissions. Inspired by the POST patch + by Arnaud Wylie. */ + FILE *fp = fopen (filename, "rb"); + if (!fp) + return -1; + fseeko (fp, 0, SEEK_END); + size = ftello (fp); + fclose (fp); + return size; +#else + struct_stat st; + if (stat (filename, &st) < 0) + return -1; + return st.st_size; +#endif } -/* Return a unique filename, given a prefix and count */ +/* 2005-02-19 SMS. + If no UNIQ_SEP is defined (as on VMS), have unique_name() return the + original name. With the VMS file systems' versioning, everything + should be fine, and appending ".NN" just causes trouble. +*/ + +#ifdef UNIQ_SEP + +/* stat file names named PREFIX.1, PREFIX.2, etc., until one that + doesn't exist is found. Return a freshly allocated copy of the + unused file name. */ + static char * -unique_name_1 (const char *fileprefix, int count) +unique_name_1 (const char *prefix) +{ + int count = 1; + int plen = strlen (prefix); + char *template = (char *)alloca (plen + 1 + 24); + char *template_tail = template + plen; + + memcpy (template, prefix, plen); + *template_tail++ = UNIQ_SEP; + + do + number_to_string (template_tail, count++); + while (file_exists_p (template)); + + return xstrdup (template); +} + +/* Return a unique file name, based on FILE. + + More precisely, if FILE doesn't exist, it is returned unmodified. + If not, FILE.1 is tried, then FILE.2, etc. The first FILE. + file name that doesn't exist is returned. + + 2005-02-19 SMS. "." is now UNIQ_SEP, and may be different. + + The resulting file is not created, only verified that it didn't + exist at the point in time when the function was called. + Therefore, where security matters, don't rely that the file created + by this function exists until you open it with O_EXCL or + equivalent. + + If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated + string. Otherwise, it may return FILE if the file doesn't exist + (and therefore doesn't need changing). */ + +char * +unique_name (const char *file, bool allow_passthrough) +{ + /* If the FILE itself doesn't exist, return it without + modification. */ + if (!file_exists_p (file)) + return allow_passthrough ? (char *)file : xstrdup (file); + + /* Otherwise, find a numeric suffix that results in unused file name + and return it. */ + return unique_name_1 (file); +} + +#else /* def UNIQ_SEP */ + +/* Dummy unique_name() for VMS. Return the original name as easily as + possible. +*/ +char * +unique_name (const char *file, bool allow_passthrough) { - char *filename; + /* Return the FILE itself, without modification, irregardful. */ + return allow_passthrough ? (char *)file : xstrdup (file); +} + +#endif /* def UNIQ_SEP [else] */ + +/* Create a file based on NAME, except without overwriting an existing + file with that name. Providing O_EXCL is correctly implemented, + this function does not have the race condition associated with + opening the file returned by unique_name. */ - if (count) +FILE * +unique_create (const char *name, bool binary, char **opened_name) +{ + /* unique file name, based on NAME */ + char *uname = unique_name (name, false); + FILE *fp; + while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST) { - filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2); - sprintf (filename, "%s.%d", fileprefix, count); + xfree (uname); + uname = unique_name (name, false); } - else - filename = xstrdup (fileprefix); - - if (!file_exists_p (filename)) - return filename; - else + if (opened_name) { - xfree (filename); - return NULL; + if (fp) + *opened_name = uname; + else + { + *opened_name = NULL; + xfree (uname); + } } + else + xfree (uname); + return fp; } -/* Return a unique file name, based on PREFIX. */ -char * -unique_name (const char *prefix) +/* Open the file for writing, with the addition that the file is + opened "exclusively". This means that, if the file already exists, + this function will *fail* and errno will be set to EEXIST. If + BINARY is set, the file will be opened in binary mode, equivalent + to fopen's "wb". + + If opening the file fails for any reason, including the file having + previously existed, this function returns NULL and sets errno + appropriately. */ + +FILE * +fopen_excl (const char *fname, int binary) { - char *file = NULL; - int count = 0; + int fd; +#ifdef O_EXCL + +/* 2005-04-14 SMS. + VMS lacks O_BINARY, but makes up for it in weird and wonderful ways. + It also has file versions which obviate all the O_EXCL effort. + O_TRUNC (something of a misnomer) requests a new version. +*/ +# ifdef __VMS +/* Common open() optional arguments: + sequential access only, access callback function. +*/ +# define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id + + int open_id; + int flags = O_WRONLY | O_CREAT | O_TRUNC; + + if (binary > 1) + { + open_id = 11; + fd = open( fname, /* File name. */ + flags, /* Flags. */ + 0777, /* Mode for default protection. */ + "ctx=bin,stm", /* Binary, stream access. */ + "rfm=stmlf", /* Stream_LF. */ + OPEN_OPT_ARGS); /* Access callback. */ + } + else if (binary) + { + open_id = 12; + fd = open( fname, /* File name. */ + flags, /* Flags. */ + 0777, /* Mode for default protection. */ + "ctx=bin,stm", /* Binary, stream access. */ + "rfm=fix", /* Fixed-length, */ + "mrs=512", /* 512-byte records. */ + OPEN_OPT_ARGS); /* Access callback. */ + } + else + { + open_id = 13; + fd = open( fname, /* File name. */ + flags, /* Flags. */ + 0777, /* Mode for default protection. */ + "rfm=stmlf", /* Stream_LF. */ + OPEN_OPT_ARGS); /* Access callback. */ + } +# else /* def __VMS */ + int flags = O_WRONLY | O_CREAT | O_EXCL; +# ifdef O_BINARY + if (binary) + flags |= O_BINARY; +# endif + fd = open (fname, flags, 0666); +# endif /* def __VMS [else] */ - while (!file) - file = unique_name_1 (prefix, count++); - return file; + if (fd < 0) + return NULL; + return fdopen (fd, binary ? "wb" : "w"); +#else /* not O_EXCL */ + /* Manually check whether the file exists. This is prone to race + conditions, but systems without O_EXCL haven't deserved + better. */ + if (file_exists_p (fname)) + { + errno = EEXIST; + return NULL; + } + return fopen (fname, binary ? "wb" : "w"); +#endif /* not O_EXCL */ } /* Create DIRECTORY. If some of the pathname components of DIRECTORY @@ -697,8 +815,7 @@ unique_name (const char *prefix) int make_directory (const char *directory) { - int quit = 0; - int i; + int i, ret, quit = 0; char *dir; /* Make a copy of dir, to be able to write to it. Otherwise, the @@ -710,22 +827,23 @@ make_directory (const char *directory) for (i = (*dir == '/'); 1; ++i) { for (; dir[i] && dir[i] != '/'; i++) - ; + ; if (!dir[i]) - quit = 1; + quit = 1; dir[i] = '\0'; - /* Check whether the directory already exists. */ + /* Check whether the directory already exists. Allow creation of + of intermediate directories to fail, as the initial path components + are not necessarily directories! */ if (!file_exists_p (dir)) - { - if (mkdir (dir, 0777) < 0) - return -1; - } + ret = mkdir (dir, 0777); + else + ret = 0; if (quit) - break; + break; else - dir[i] = '/'; + dir[i] = '/'; } - return 0; + return ret; } /* Merge BASE with FILE. BASE can be a directory or a file name, FILE @@ -735,7 +853,7 @@ make_directory (const char *directory) file_merge("/foo/bar/", "baz") => "/foo/bar/baz" file_merge("foo", "bar") => "bar" - In other words, it's a simpler and gentler version of uri_merge_1. */ + In other words, it's a simpler and gentler version of uri_merge. */ char * file_merge (const char *base, const char *file) @@ -746,7 +864,7 @@ file_merge (const char *base, const char *file) if (!cut) return xstrdup (file); - result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1); + result = xmalloc (cut - base + 1 + strlen (file) + 1); memcpy (result, base, cut - base); result[cut - base] = '/'; strcpy (result + (cut - base) + 1, file); @@ -754,141 +872,202 @@ file_merge (const char *base, const char *file) return result; } -static int in_acclist PARAMS ((const char *const *, const char *, int)); +/* Like fnmatch, but performs a case-insensitive match. */ + +int +fnmatch_nocase (const char *pattern, const char *string, int flags) +{ +#ifdef FNM_CASEFOLD + /* The FNM_CASEFOLD flag started as a GNU extension, but it is now + also present on *BSD platforms, and possibly elsewhere. */ + return fnmatch (pattern, string, flags | FNM_CASEFOLD); +#else + /* Turn PATTERN and STRING to lower case and call fnmatch on them. */ + char *patcopy = (char *) alloca (strlen (pattern) + 1); + char *strcopy = (char *) alloca (strlen (string) + 1); + char *p; + for (p = patcopy; *pattern; pattern++, p++) + *p = c_tolower (*pattern); + *p = '\0'; + for (p = strcopy; *string; string++, p++) + *p = c_tolower (*string); + *p = '\0'; + return fnmatch (patcopy, strcopy, flags); +#endif +} + +static bool in_acclist (const char *const *, const char *, bool); /* Determine whether a file is acceptable to be followed, according to lists of patterns to accept/reject. */ -int +bool acceptable (const char *s) { - int l = strlen (s); + const char *p; + + if (opt.output_document && strcmp (s, opt.output_document) == 0) + return true; + + if ((p = strrchr (s, '/'))) + s = p + 1; - while (l && s[l] != '/') - --l; - if (s[l] == '/') - s += (l + 1); if (opt.accepts) { if (opt.rejects) - return (in_acclist ((const char *const *)opt.accepts, s, 1) - && !in_acclist ((const char *const *)opt.rejects, s, 1)); + return (in_acclist ((const char *const *)opt.accepts, s, true) + && !in_acclist ((const char *const *)opt.rejects, s, true)); else - return in_acclist ((const char *const *)opt.accepts, s, 1); + return in_acclist ((const char *const *)opt.accepts, s, true); } else if (opt.rejects) - return !in_acclist ((const char *const *)opt.rejects, s, 1); - return 1; + return !in_acclist ((const char *const *)opt.rejects, s, true); + + return true; } -/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is - `/something', frontcmp() will return 1 only if S2 begins with - `/something'. Otherwise, 0 is returned. */ -int -frontcmp (const char *s1, const char *s2) +/* Determine whether an URL is acceptable to be followed, according to + regex patterns to accept/reject. */ +bool +accept_url (const char *s) { - for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2); - return !*s1; + if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s)) + return false; + if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s)) + return false; + + return true; } -/* Iterate through STRLIST, and return the first element that matches - S, through wildcards or front comparison (as appropriate). */ -static char * -proclist (char **strlist, const char *s, enum accd flags) +/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() + will return true if and only if D2 begins with `/something/' or is exactly + '/something'. */ +bool +subdir_p (const char *d1, const char *d2) +{ + if (*d1 == '\0') + return true; + if (!opt.ignore_case) + for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2) + ; + else + for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2) + ; + + return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); +} + +/* Iterate through DIRLIST (which must be NULL-terminated), and return the + first element that matches DIR, through wildcards or front comparison (as + appropriate). */ +static bool +dir_matches_p (const char **dirlist, const char *dir) { - char **x; + const char **x; + int (*matcher) (const char *, const char *, int) + = opt.ignore_case ? fnmatch_nocase : fnmatch; - for (x = strlist; *x; x++) - if (has_wildcards_p (*x)) - { - if (fnmatch (*x, s, FNM_PATHNAME) == 0) - break; - } - else - { - char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */ - if (frontcmp (p, s)) - break; - } - return *x; + for (x = dirlist; *x; x++) + { + /* Remove leading '/' */ + const char *p = *x + (**x == '/'); + if (has_wildcards_p (p)) + { + if (matcher (p, dir, FNM_PATHNAME) == 0) + break; + } + else + { + if (subdir_p (p, dir)) + break; + } + } + + return *x ? true : false; } /* Returns whether DIRECTORY is acceptable for download, wrt the include/exclude lists. - If FLAGS is ALLABS, the leading `/' is ignored in paths; relative - and absolute paths may be freely intermixed. */ -int -accdir (const char *directory, enum accd flags) + The leading `/' is ignored in paths; relative and absolute paths + may be freely intermixed. */ + +bool +accdir (const char *directory) { /* Remove starting '/'. */ - if (flags & ALLABS && *directory == '/') + if (*directory == '/') ++directory; if (opt.includes) { - if (!proclist (opt.includes, directory, flags)) - return 0; + if (!dir_matches_p (opt.includes, directory)) + return false; } if (opt.excludes) { - if (proclist (opt.excludes, directory, flags)) - return 0; + if (dir_matches_p (opt.excludes, directory)) + return false; } - return 1; + return true; } -/* Match the end of STRING against PATTERN. For instance: +/* Return true if STRING ends with TAIL. For instance: - match_backwards ("abc", "bc") -> 1 - match_backwards ("abc", "ab") -> 0 - match_backwards ("abc", "abc") -> 1 */ -int -match_tail (const char *string, const char *pattern) + match_tail ("abc", "bc", false) -> 1 + match_tail ("abc", "ab", false) -> 0 + match_tail ("abc", "abc", false) -> 1 + + If FOLD_CASE is true, the comparison will be case-insensitive. */ + +bool +match_tail (const char *string, const char *tail, bool fold_case) { - int i, j; + int pos = strlen (string) - strlen (tail); - for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--) - if (string[i] != pattern[j]) - break; - /* If the pattern was exhausted, the match was succesful. */ - if (j == -1) - return 1; + if (pos < 0) + return false; /* tail is longer than string. */ + + if (!fold_case) + return !strcmp (string + pos, tail); else - return 0; + return !strcasecmp (string + pos, tail); } /* Checks whether string S matches each element of ACCEPTS. A list element are matched either with fnmatch() or match_tail(), according to whether the element contains wildcards or not. - If the BACKWARD is 0, don't do backward comparison -- just compare + If the BACKWARD is false, don't do backward comparison -- just compare them normally. */ -static int -in_acclist (const char *const *accepts, const char *s, int backward) +static bool +in_acclist (const char *const *accepts, const char *s, bool backward) { for (; *accepts; accepts++) { if (has_wildcards_p (*accepts)) - { - /* fnmatch returns 0 if the pattern *does* match the - string. */ - if (fnmatch (*accepts, s, 0) == 0) - return 1; - } + { + int res = opt.ignore_case + ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0); + /* fnmatch returns 0 if the pattern *does* match the string. */ + if (res == 0) + return true; + } else - { - if (backward) - { - if (match_tail (s, *accepts)) - return 1; - } - else - { - if (!strcmp (s, *accepts)) - return 1; - } - } + { + if (backward) + { + if (match_tail (s, *accepts, opt.ignore_case)) + return true; + } + else + { + int cmp = opt.ignore_case + ? strcasecmp (s, *accepts) : strcmp (s, *accepts); + if (cmp == 0) + return true; + } + } } - return 0; + return false; } /* Return the location of STR's suffix (file extension). Examples: @@ -899,97 +1078,79 @@ in_acclist (const char *const *accepts, const char *s, int backward) char * suffix (const char *str) { - int i; + char *p; - for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--) - ; + if ((p = strrchr (str, '.')) && !strchr (p + 1, '/')) + return p + 1; - if (str[i++] == '.') - return (char *)str + i; - else - return NULL; + return NULL; } -/* Read a line from FP and return the pointer to freshly allocated - storage. The stoarage space is obtained through malloc() and - should be freed with free() when it is no longer needed. +/* Return true if S contains globbing wildcards (`*', `?', `[' or + `]'). */ - The length of the line is not limited, except by available memory. - The newline character at the end of line is retained. The line is - terminated with a zero character. +bool +has_wildcards_p (const char *s) +{ + return !!strpbrk (s, "*?[]"); +} - After end-of-file is encountered without anything being read, NULL - is returned. NULL is also returned on error. To distinguish - between these two cases, use the stdio function ferror(). */ +/* Return true if FNAME ends with a typical HTML suffix. The + following (case-insensitive) suffixes are presumed to be HTML + files: -char * -read_whole_line (FILE *fp) + html + htm + ?html (`?' matches one character) + + #### CAVEAT. This is not necessarily a good indication that FNAME + refers to a file that contains HTML! */ +bool +has_html_suffix_p (const char *fname) { - int length = 0; - int bufsize = 82; - char *line = (char *)xmalloc (bufsize); + char *suf; - while (fgets (line + length, bufsize - length, fp)) - { - length += strlen (line + length); - if (length == 0) - /* Possible for example when reading from a binary file where - a line begins with \0. */ - continue; - - if (line[length - 1] == '\n') - break; - - /* fgets() guarantees to read the whole line, or to use up the - space we've given it. We can double the buffer - unconditionally. */ - bufsize <<= 1; - line = xrealloc (line, bufsize); - } - if (length == 0 || ferror (fp)) - { - xfree (line); - return NULL; - } - if (length + 1 < bufsize) - /* Relieve the memory from our exponential greediness. We say - `length + 1' because the terminating \0 is not included in - LENGTH. We don't need to zero-terminate the string ourselves, - though, because fgets() does that. */ - line = xrealloc (line, length + 1); - return line; + if ((suf = suffix (fname)) == NULL) + return false; + if (!strcasecmp (suf, "html")) + return true; + if (!strcasecmp (suf, "htm")) + return true; + if (suf[0] && !strcasecmp (suf + 1, "html")) + return true; + return false; } - + /* Read FILE into memory. A pointer to `struct file_memory' are returned; use struct element `content' to access file contents, and the element `length' to know the file length. `content' is *not* zero-terminated, and you should *not* read or write beyond the [0, length) range of characters. - After you are done with the file contents, call read_file_free to + After you are done with the file contents, call wget_read_file_free to release the memory. Depending on the operating system and the type of file that is - being read, read_file() either mmap's the file into memory, or + being read, wget_read_file() either mmap's the file into memory, or reads the file into the core using read(). If file is named "-", fileno(stdin) is used for reading instead. If you want to read from a real file named "-", use "./-" instead. */ struct file_memory * -read_file (const char *file) +wget_read_file (const char *file) { int fd; struct file_memory *fm; long size; - int inhibit_close = 0; + bool inhibit_close = false; /* Some magic in the finest tradition of Perl and its kin: if FILE is "-", just use stdin. */ if (HYPHENP (file)) { fd = fileno (stdin); - inhibit_close = 1; + inhibit_close = true; /* Note that we don't inhibit mmap() in this case. If stdin is redirected from a regular file, mmap() will still work. */ } @@ -997,11 +1158,11 @@ read_file (const char *file) fd = open (file, O_RDONLY); if (fd < 0) return NULL; - fm = xmalloc (sizeof (struct file_memory)); + fm = xnew (struct file_memory); #ifdef HAVE_MMAP { - struct stat buf; + struct_fstat buf; if (fstat (fd, &buf) < 0) goto mmap_lose; fm->length = buf.st_size; @@ -1010,7 +1171,7 @@ read_file (const char *file) specify PROT_READ and MAP_SHARED for a marginal gain in efficiency, but at some cost to generality. */ fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE, - MAP_PRIVATE, fd, 0); + MAP_PRIVATE, fd, 0); if (fm->content == (char *)MAP_FAILED) goto mmap_lose; if (!inhibit_close) @@ -1028,41 +1189,41 @@ read_file (const char *file) #endif /* HAVE_MMAP */ fm->length = 0; - size = 512; /* number of bytes fm->contents can + size = 512; /* number of bytes fm->contents can hold at any given time. */ fm->content = xmalloc (size); while (1) { - long nread; + wgint nread; if (fm->length > size / 2) - { - /* #### I'm not sure whether the whole exponential-growth + { + /* #### I'm not sure whether the whole exponential-growth thing makes sense with kernel read. On Linux at least, read() refuses to read more than 4K from a file at a single chunk anyway. But other Unixes might optimize it better, and it doesn't *hurt* anything, so I'm leaving it. */ - /* Normally, we grow SIZE exponentially to make the number + /* Normally, we grow SIZE exponentially to make the number of calls to read() and realloc() logarithmic in relation to file size. However, read() can read an amount of data - smaller than requested, and it would be unreasonably to + smaller than requested, and it would be unreasonable to double SIZE every time *something* was read. Therefore, we double SIZE only when the length exceeds half of the entire allocated size. */ - size <<= 1; - fm->content = xrealloc (fm->content, size); - } + size <<= 1; + fm->content = xrealloc (fm->content, size); + } nread = read (fd, fm->content + fm->length, size - fm->length); if (nread > 0) - /* Successful read. */ - fm->length += nread; + /* Successful read. */ + fm->length += nread; else if (nread < 0) - /* Error. */ - goto lose; + /* Error. */ + goto lose; else - /* EOF */ - break; + /* EOF */ + break; } if (!inhibit_close) close (fd); @@ -1087,7 +1248,7 @@ read_file (const char *file) memory needed to hold the FM structure itself. */ void -read_file_free (struct file_memory *fm) +wget_read_file_free (struct file_memory *fm) { #ifdef HAVE_MMAP if (fm->mmap_p) @@ -1111,7 +1272,7 @@ free_vec (char **vec) { char **p = vec; while (*p) - xfree (*p++); + xfree (*p++); xfree (vec); } } @@ -1135,99 +1296,40 @@ merge_vecs (char **v1, char **v2) return v1; } /* Count v1. */ - for (i = 0; v1[i]; i++); + for (i = 0; v1[i]; i++) + ; /* Count v2. */ - for (j = 0; v2[j]; j++); + for (j = 0; v2[j]; j++) + ; /* Reallocate v1. */ - v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **)); + v1 = xrealloc (v1, (i + j + 1) * sizeof (char **)); memcpy (v1 + i, v2, (j + 1) * sizeof (char *)); xfree (v2); return v1; } -/* A set of simple-minded routines to store strings in a linked list. - This used to also be used for searching, but now we have hash - tables for that. */ - -/* It's a shame that these simple things like linked lists and hash - tables (see hash.c) need to be implemented over and over again. It - would be nice to be able to use the routines from glib -- see - www.gtk.org for details. However, that would make Wget depend on - glib, and I want to avoid dependencies to external libraries for - reasons of convenience and portability (I suspect Wget is more - portable than anything ever written for Gnome). */ - -/* Append an element to the list. If the list has a huge number of - elements, this can get slow because it has to find the list's - ending. If you think you have to call slist_append in a loop, - think about calling slist_prepend() followed by slist_nreverse(). */ - -slist * -slist_append (slist *l, const char *s) -{ - slist *newel = (slist *)xmalloc (sizeof (slist)); - slist *beg = l; - - newel->string = xstrdup (s); - newel->next = NULL; - - if (!l) - return newel; - /* Find the last element. */ - while (l->next) - l = l->next; - l->next = newel; - return beg; -} - -/* Prepend S to the list. Unlike slist_append(), this is O(1). */ - -slist * -slist_prepend (slist *l, const char *s) -{ - slist *newel = (slist *)xmalloc (sizeof (slist)); - newel->string = xstrdup (s); - newel->next = l; - return newel; -} - -/* Destructively reverse L. */ +/* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it + is allocated as needed. Return the new value of the vector. */ -slist * -slist_nreverse (slist *l) -{ - slist *prev = NULL; - while (l) - { - slist *next = l->next; - l->next = prev; - prev = l; - l = next; - } - return prev; -} - -/* Is there a specific entry in the list? */ -int -slist_contains (slist *l, const char *s) -{ - for (; l; l = l->next) - if (!strcmp (l->string, s)) - return 1; - return 0; -} - -/* Free the whole slist. */ -void -slist_free (slist *l) +char ** +vec_append (char **vec, const char *str) { - while (l) + int cnt; /* count of vector elements, including + the one we're about to append */ + if (vec != NULL) { - slist *n = l->next; - xfree (l->string); - xfree (l); - l = n; + for (cnt = 0; vec[cnt]; cnt++) + ; + ++cnt; } + else + cnt = 1; + /* Reallocate the array to fit the new element and the NULL. */ + vec = xrealloc (vec, (cnt + 1) * sizeof (char *)); + /* Append a copy of STR to the vector. */ + vec[cnt - 1] = xstrdup (str); + vec[cnt] = NULL; + return vec; } /* Sometimes it's useful to create "sets" of strings, i.e. special @@ -1259,247 +1361,358 @@ string_set_contains (struct hash_table *ht, const char *s) return hash_table_contains (ht, s); } -static int -string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) +/* Convert the specified string set to array. ARRAY should be large + enough to hold hash_table_count(ht) char pointers. */ + +void string_set_to_array (struct hash_table *ht, char **array) { - xfree (key); - return 0; + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + *array++ = iter.key; } +/* Free the string set. This frees both the storage allocated for + keys and the actual hash table. (hash_table_destroy would only + destroy the hash table.) */ + void string_set_free (struct hash_table *ht) { - hash_table_map (ht, string_set_free_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + xfree (iter.key); hash_table_destroy (ht); } -static int -free_keys_and_values_mapper (void *key, void *value, void *arg_ignored) -{ - xfree (key); - xfree (value); - return 0; -} - -/* Another utility function: call free() on all keys and values of HT. */ +/* Utility function: simply call xfree() on all keys and values of HT. */ void free_keys_and_values (struct hash_table *ht) { - hash_table_map (ht, free_keys_and_values_mapper, NULL); + hash_table_iterator iter; + for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) + { + xfree (iter.key); + xfree (iter.value); + } } - -/* Engine for legible and legible_very_long; this function works on - strings. */ +/* Get digit grouping data for thousand separors by calling + localeconv(). The data includes separator string and grouping info + and is cached after the first call to the function. -static char * -legible_1 (const char *repr) -{ - static char outbuf[128]; - int i, i1, mod; - char *outptr; - const char *inptr; - - /* Reset the pointers. */ - outptr = outbuf; - inptr = repr; - /* If the number is negative, shift the pointers. */ - if (*inptr == '-') - { - *outptr++ = '-'; - ++inptr; - } - /* How many digits before the first separator? */ - mod = strlen (inptr) % 3; - /* Insert them. */ - for (i = 0; i < mod; i++) - *outptr++ = inptr[i]; - /* Now insert the rest of them, putting separator before every - third digit. */ - for (i1 = i, i = 0; inptr[i1]; i++, i1++) + In locales that don't set a thousand separator (such as the "C" + locale), this forces it to be ",". We are now only showing + thousand separators in one place, so this shouldn't be a problem in + practice. */ + +static void +get_grouping_data (const char **sep, const char **grouping) +{ + static const char *cached_sep; + static const char *cached_grouping; + static bool initialized; + if (!initialized) { - if (i % 3 == 0 && i1 != 0) - *outptr++ = ','; - *outptr++ = inptr[i1]; + /* Get the grouping info from the locale. */ + struct lconv *lconv = localeconv (); + cached_sep = lconv->thousands_sep; + cached_grouping = lconv->grouping; +#if ! USE_NLS_PROGRESS_BAR + /* We can't count column widths, so ensure that the separator + * is single-byte only (let check below determine what byte). */ + if (strlen(cached_sep) > 1) + cached_sep = ""; +#endif + if (!*cached_sep) + { + /* Many locales (such as "C" or "hr_HR") don't specify + grouping, which we still want to use it for legibility. + In those locales set the sep char to ',', unless that + character is used for decimal point, in which case set it + to ".". */ + if (*lconv->decimal_point != ',') + cached_sep = ","; + else + cached_sep = "."; + cached_grouping = "\x03"; + } + initialized = true; } - /* Zero-terminate the string. */ - *outptr = '\0'; - return outbuf; + *sep = cached_sep; + *grouping = cached_grouping; } -/* Legible -- return a static pointer to the legibly printed long. */ -char * -legible (long l) +/* Return a printed representation of N with thousand separators. + This should respect locale settings, with the exception of the "C" + locale which mandates no separator, but we use one anyway. + + Unfortunately, we cannot use %'d (in fact it would be %'j) to get + the separators because it's too non-portable, and it's hard to test + for this feature at configure time. Besides, it wouldn't display + separators in the "C" locale, still used by many Unix users. */ + +const char * +with_thousand_seps (wgint n) { - char inbuf[24]; - /* Print the number into the buffer. */ - number_to_string (inbuf, l); - return legible_1 (inbuf); -} + static char outbuf[48]; + char *p = outbuf + sizeof outbuf; -/* Write a string representation of NUMBER into the provided buffer. - We cannot use sprintf() because we cannot be sure whether the - platform supports printing of what we chose for VERY_LONG_TYPE. + /* Info received from locale */ + const char *grouping, *sep; + int seplen; - Example: Gcc supports `long long' under many platforms, but on many - of those the native libc knows nothing of it and therefore cannot - print it. + /* State information */ + int i = 0, groupsize; + const char *atgroup; - How long BUFFER needs to be depends on the platform and the content - of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24 - bytes are sufficient. Using more might be a good idea. + bool negative = n < 0; - This function does not go through the hoops that long_to_string - goes to because it doesn't aspire to be fast. (It's called perhaps - once in a Wget run.) */ + /* Initialize grouping data. */ + get_grouping_data (&sep, &grouping); + seplen = strlen (sep); + atgroup = grouping; + groupsize = *atgroup++; -static void -very_long_to_string (char *buffer, VERY_LONG_TYPE number) -{ - int i = 0; - int j; + /* This would overflow on WGINT_MIN, but printing negative numbers + is not an important goal of this fuinction. */ + if (negative) + n = -n; - /* Print the number backwards... */ - do + /* Write the number into the buffer, backwards, inserting the + separators as necessary. */ + *--p = '\0'; + while (1) { - buffer[i++] = '0' + number % 10; - number /= 10; + *--p = n % 10 + '0'; + n /= 10; + if (n == 0) + break; + /* Prepend SEP to every groupsize'd digit and get new groupsize. */ + if (++i == groupsize) + { + if (seplen == 1) + *--p = *sep; + else + memcpy (p -= seplen, sep, seplen); + i = 0; + if (*atgroup) + groupsize = *atgroup++; + } } - while (number); + if (negative) + *--p = '-'; - /* ...and reverse the order of the digits. */ - for (j = 0; j < i / 2; j++) - { - char c = buffer[j]; - buffer[j] = buffer[i - 1 - j]; - buffer[i - 1 - j] = c; - } - buffer[i] = '\0'; + return p; } -/* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */ +/* N, a byte quantity, is converted to a human-readable abberviated + form a la sizes printed by `ls -lh'. The result is written to a + static buffer, a pointer to which is returned. + + Unlike `with_thousand_seps', this approximates to the nearest unit. + Quoting GNU libit: "Most people visually process strings of 3-4 + digits effectively, but longer strings of digits are more prone to + misinterpretation. Hence, converting to an abbreviated form + usually improves readability." + + This intentionally uses kilobyte (KB), megabyte (MB), etc. in their + original computer-related meaning of "powers of 1024". We don't + use the "*bibyte" names invented in 1998, and seldom used in + practice. Wikipedia's entry on "binary prefix" discusses this in + some detail. */ + char * -legible_very_long (VERY_LONG_TYPE l) +human_readable (HR_NUMTYPE n, const int acc, const int decimals) { - char inbuf[128]; - /* Print the number into the buffer. */ - very_long_to_string (inbuf, l); - return legible_1 (inbuf); + /* These suffixes are compatible with those of GNU `ls -lh'. */ + static char powers[] = + { + 'K', /* kilobyte, 2^10 bytes */ + 'M', /* megabyte, 2^20 bytes */ + 'G', /* gigabyte, 2^30 bytes */ + 'T', /* terabyte, 2^40 bytes */ + 'P', /* petabyte, 2^50 bytes */ + 'E', /* exabyte, 2^60 bytes */ + }; + static char buf[8]; + size_t i; + + /* If the quantity is smaller than 1K, just print it. */ + if (n < 1024) + { + snprintf (buf, sizeof (buf), "%d", (int) n); + return buf; + } + + /* Loop over powers, dividing N with 1024 in each iteration. This + works unchanged for all sizes of wgint, while still avoiding + non-portable `long double' arithmetic. */ + for (i = 0; i < countof (powers); i++) + { + /* At each iteration N is greater than the *subsequent* power. + That way N/1024.0 produces a decimal number in the units of + *this* power. */ + if ((n / 1024) < 1024 || i == countof (powers) - 1) + { + double val = n / 1024.0; + /* Print values smaller than the accuracy level (acc) with (decimal) + * decimal digits, and others without any decimals. */ + snprintf (buf, sizeof (buf), "%.*f%c", + val < acc ? decimals : 0, val, powers[i]); + return buf; + } + n /= 1024; + } + return NULL; /* unreached */ } -/* Count the digits in a (long) integer. */ +/* Count the digits in the provided number. Used to allocate space + when printing numbers. */ + int -numdigit (long number) +numdigit (wgint number) { int cnt = 1; if (number < 0) - { - number = -number; - ++cnt; - } - while ((number /= 10) > 0) + ++cnt; /* accomodate '-' */ + while ((number /= 10) != 0) ++cnt; return cnt; } -#define ONE_DIGIT(figure) *p++ = n / (figure) + '0' -#define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure)) +#define PR(mask) *p++ = n / (mask) + '0' + +/* DIGITS_ is used to print a D-digit number and should be called + with mask==10^(D-1). It prints n/mask (the first digit), reducing + n to n%mask (the remaining digits), and calling DIGITS_. + Recursively this continues until DIGITS_1 is invoked. */ + +#define DIGITS_1(mask) PR (mask) +#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10) +#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10) +#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10) +#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10) +#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10) +#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10) +#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10) +#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10) +#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10) -#define DIGITS_1(figure) ONE_DIGIT (figure) -#define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10) -#define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10) -#define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10) -#define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10) -#define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10) -#define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10) -#define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10) -#define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10) -#define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10) +/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */ -/* DIGITS_<11-20> are only used on machines with 64-bit longs. */ +#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10) +#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10) +#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10) +#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10) +#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10) +#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10) +#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10) +#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10) +#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10) -#define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10) -#define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10) -#define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10) -#define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10) -#define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10) -#define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10) -#define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10) -#define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10) -#define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10) +/* Shorthand for casting to wgint. */ +#define W wgint -/* Print NUMBER to BUFFER in base 10. This should be completely - equivalent to `sprintf(buffer, "%ld", number)', only much faster. +/* Print NUMBER to BUFFER in base 10. This is equivalent to + `sprintf(buffer, "%lld", (long long) number)', only typically much + faster and portable to machines without long long. The speedup may make a difference in programs that frequently convert numbers to strings. Some implementations of sprintf, - particularly the one in GNU libc, have been known to be extremely - slow compared to this function. + particularly the one in some versions of GNU libc, have been known + to be quite slow when converting integers to strings. Return the pointer to the location where the terminating zero was printed. (Equivalent to calling buffer+strlen(buffer) after the function is done.) - BUFFER should be big enough to accept as many bytes as you expect - the number to take up. On machines with 64-bit longs the maximum + BUFFER should be large enough to accept as many bytes as you expect + the number to take up. On machines with 64-bit wgints the maximum needed size is 24 bytes. That includes the digits needed for the largest 64-bit number, the `-' sign in case it's negative, and the terminating '\0'. */ char * -number_to_string (char *buffer, long number) +number_to_string (char *buffer, wgint number) { char *p = buffer; - long n = number; + wgint n = number; -#if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8) - /* We are running in a strange or misconfigured environment. Let - sprintf cope with it. */ - sprintf (buffer, "%ld", n); - p += strlen (buffer); -#else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */ + int last_digit_char = 0; + +#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8) + /* We are running in a very strange environment. Leave the correct + printing to sprintf. */ + p += sprintf (buf, "%j", (intmax_t) (n)); +#else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ if (n < 0) { + if (n < -WGINT_MAX) + { + /* n = -n would overflow because -n would evaluate to a + wgint value larger than WGINT_MAX. Need to make n + smaller and handle the last digit separately. */ + int last_digit = n % 10; + /* The sign of n%10 is implementation-defined. */ + if (last_digit < 0) + last_digit_char = '0' - last_digit; + else + last_digit_char = '0' + last_digit; + /* After n is made smaller, -n will not overflow. */ + n /= 10; + } + *p++ = '-'; n = -n; } - if (n < 10) { DIGITS_1 (1); } - else if (n < 100) { DIGITS_2 (10); } - else if (n < 1000) { DIGITS_3 (100); } - else if (n < 10000) { DIGITS_4 (1000); } - else if (n < 100000) { DIGITS_5 (10000); } - else if (n < 1000000) { DIGITS_6 (100000); } - else if (n < 10000000) { DIGITS_7 (1000000); } - else if (n < 100000000) { DIGITS_8 (10000000); } - else if (n < 1000000000) { DIGITS_9 (100000000); } -#if SIZEOF_LONG == 4 - /* ``if (1)'' serves only to preserve editor indentation. */ - else if (1) { DIGITS_10 (1000000000); } -#else /* SIZEOF_LONG != 4 */ - else if (n < 10000000000L) { DIGITS_10 (1000000000L); } - else if (n < 100000000000L) { DIGITS_11 (10000000000L); } - else if (n < 1000000000000L) { DIGITS_12 (100000000000L); } - else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); } - else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); } - else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); } - else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); } - else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); } - else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); } - else { DIGITS_19 (1000000000000000000L); } -#endif /* SIZEOF_LONG != 4 */ + /* Use the DIGITS_ macro appropriate for N's number of digits. That + way printing any N is fully open-coded without a loop or jump. + (Also see description of DIGITS_*.) */ + + if (n < 10) DIGITS_1 (1); + else if (n < 100) DIGITS_2 (10); + else if (n < 1000) DIGITS_3 (100); + else if (n < 10000) DIGITS_4 (1000); + else if (n < 100000) DIGITS_5 (10000); + else if (n < 1000000) DIGITS_6 (100000); + else if (n < 10000000) DIGITS_7 (1000000); + else if (n < 100000000) DIGITS_8 (10000000); + else if (n < 1000000000) DIGITS_9 (100000000); +#if SIZEOF_WGINT == 4 + /* wgint is 32 bits wide: no number has more than 10 digits. */ + else DIGITS_10 (1000000000); +#else + /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits. + Constants are constructed by compile-time multiplication to avoid + dealing with different notations for 64-bit constants + (nL/nLL/nI64, depending on the compiler and architecture). */ + else if (n < 10*(W)1000000000) DIGITS_10 (1000000000); + else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000); + else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000); + else if (n < 10000*(W)1000000000) DIGITS_13 (1000*(W)1000000000); + else if (n < 100000*(W)1000000000) DIGITS_14 (10000*(W)1000000000); + else if (n < 1000000*(W)1000000000) DIGITS_15 (100000*(W)1000000000); + else if (n < 10000000*(W)1000000000) DIGITS_16 (1000000*(W)1000000000); + else if (n < 100000000*(W)1000000000) DIGITS_17 (10000000*(W)1000000000); + else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000); + else DIGITS_19 (1000000000*(W)1000000000); +#endif + + if (last_digit_char) + *p++ = last_digit_char; *p = '\0'; -#endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */ +#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ return p; } -#undef ONE_DIGIT -#undef ONE_DIGIT_ADVANCE - +#undef PR +#undef W +#undef SPRINTF_WGINT #undef DIGITS_1 #undef DIGITS_2 #undef DIGITS_3 @@ -1519,384 +1732,818 @@ number_to_string (char *buffer, long number) #undef DIGITS_17 #undef DIGITS_18 #undef DIGITS_19 + +#define RING_SIZE 3 + +/* Print NUMBER to a statically allocated string and return a pointer + to the printed representation. + + This function is intended to be used in conjunction with printf. + It is hard to portably print wgint values: + a) you cannot use printf("%ld", number) because wgint can be long + long on 32-bit machines with LFS. + b) you cannot use printf("%lld", number) because NUMBER could be + long on 32-bit machines without LFS, or on 64-bit machines, + which do not require LFS. Also, Windows doesn't support %lld. + c) you cannot use printf("%j", (int_max_t) number) because not all + versions of printf support "%j", the most notable being the one + on Windows. + d) you cannot #define WGINT_FMT to the appropriate format and use + printf(WGINT_FMT, number) because that would break translations + for user-visible messages, such as printf("Downloaded: %d + bytes\n", number). + + What you should use instead is printf("%s", number_to_static_string + (number)). + + CAVEAT: since the function returns pointers to static data, you + must be careful to copy its result before calling it again. + However, to make it more useful with printf, the function maintains + an internal ring of static buffers to return. That way things like + printf("%s %s", number_to_static_string (num1), + number_to_static_string (num2)) work as expected. Three buffers + are currently used, which means that "%s %s %s" will work, but "%s + %s %s %s" won't. If you need to print more than three wgints, + bump the RING_SIZE (or rethink your message.) */ + +char * +number_to_static_string (wgint number) +{ + static char ring[RING_SIZE][24]; + static int ringpos; + char *buf = ring[ringpos]; + number_to_string (buf, number); + ringpos = (ringpos + 1) % RING_SIZE; + return buf; +} + +/* Converts the byte to bits format if --report-bps option is enabled + */ +wgint +convert_to_bits (wgint num) +{ + if (opt.report_bps) + return num * 8; + return num; +} + -/* Support for timers. */ - -#undef TIMER_WINDOWS -#undef TIMER_GETTIMEOFDAY -#undef TIMER_TIME - -/* Depending on the OS and availability of gettimeofday(), one and - only one of the above constants will be defined. Virtually all - modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will - use TIMER_WINDOWS. TIMER_TIME is a catch-all method for - non-Windows systems without gettimeofday. - - #### Perhaps we should also support ftime(), which exists on old - BSD 4.2-influenced systems? (It also existed under MS DOS Borland - C, if memory serves me.) */ - -#ifdef WINDOWS -# define TIMER_WINDOWS -#else /* not WINDOWS */ -# ifdef HAVE_GETTIMEOFDAY -# define TIMER_GETTIMEOFDAY -# else -# define TIMER_TIME -# endif -#endif /* not WINDOWS */ +/* Determine the width of the terminal we're running on. If that's + not possible, return 0. */ -struct wget_timer { -#ifdef TIMER_GETTIMEOFDAY - long secs; - long usecs; -#endif +int +determine_screen_width (void) +{ + /* If there's a way to get the terminal size using POSIX + tcgetattr(), somebody please tell me. */ +#ifdef TIOCGWINSZ + int fd; + struct winsize wsz; -#ifdef TIMER_TIME - time_t secs; -#endif + if (opt.lfilename != NULL) + return 0; -#ifdef TIMER_WINDOWS - ULARGE_INTEGER wintime; -#endif -}; + fd = fileno (stderr); + if (ioctl (fd, TIOCGWINSZ, &wsz) < 0) + return 0; /* most likely ENOTTY */ -/* Allocate a timer. It is not legal to do anything with a freshly - allocated timer, except call wtimer_reset() or wtimer_delete(). */ + return wsz.ws_col; +#elif defined(WINDOWS) + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi)) + return 0; + return csbi.dwSize.X; +#else /* neither TIOCGWINSZ nor WINDOWS */ + return 0; +#endif /* neither TIOCGWINSZ nor WINDOWS */ +} + +/* Whether the rnd system (either rand or [dl]rand48) has been + seeded. */ +static int rnd_seeded; -struct wget_timer * -wtimer_allocate (void) +/* Return a random number between 0 and MAX-1, inclusive. + + If the system does not support lrand48 and MAX is greater than the + value of RAND_MAX+1 on the system, the returned value will be in + the range [0, RAND_MAX]. This may be fixed in a future release. + The random number generator is seeded automatically the first time + it is called. + + This uses lrand48 where available, rand elsewhere. DO NOT use it + for cryptography. It is only meant to be used in situations where + quality of the random numbers returned doesn't really matter. */ + +int +random_number (int max) { - struct wget_timer *wt = - (struct wget_timer *)xmalloc (sizeof (struct wget_timer)); - return wt; +#ifdef HAVE_DRAND48 + if (!rnd_seeded) + { + srand48 ((long) time (NULL) ^ (long) getpid ()); + rnd_seeded = 1; + } + return lrand48 () % max; +#else /* not HAVE_DRAND48 */ + + double bounded; + int rnd; + if (!rnd_seeded) + { + srand ((unsigned) time (NULL) ^ (unsigned) getpid ()); + rnd_seeded = 1; + } + rnd = rand (); + + /* Like rand() % max, but uses the high-order bits for better + randomness on architectures where rand() is implemented using a + simple congruential generator. */ + + bounded = (double) max * rnd / (RAND_MAX + 1.0); + return (int) bounded; + +#endif /* not HAVE_DRAND48 */ } -/* Allocate a new timer and reset it. Return the new timer. */ +/* Return a random uniformly distributed floating point number in the + [0, 1) range. Uses drand48 where available, and a really lame + kludge elsewhere. */ -struct wget_timer * -wtimer_new (void) +double +random_float (void) { - struct wget_timer *wt = wtimer_allocate (); - wtimer_reset (wt); - return wt; +#ifdef HAVE_DRAND48 + if (!rnd_seeded) + { + srand48 ((long) time (NULL) ^ (long) getpid ()); + rnd_seeded = 1; + } + return drand48 (); +#else /* not HAVE_DRAND48 */ + return ( random_number (10000) / 10000.0 + + random_number (10000) / (10000.0 * 10000.0) + + random_number (10000) / (10000.0 * 10000.0 * 10000.0) + + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0)); +#endif /* not HAVE_DRAND48 */ } + +/* Implementation of run_with_timeout, a generic timeout-forcing + routine for systems with Unix-like signal handling. */ -/* Free the resources associated with the timer. Its further use is - prohibited. */ +#ifdef USE_SIGNAL_TIMEOUT +# ifdef HAVE_SIGSETJMP +# define SETJMP(env) sigsetjmp (env, 1) -void -wtimer_delete (struct wget_timer *wt) +static sigjmp_buf run_with_timeout_env; + +static void +abort_run_with_timeout (int sig) { - xfree (wt); + assert (sig == SIGALRM); + siglongjmp (run_with_timeout_env, -1); } +# else /* not HAVE_SIGSETJMP */ +# define SETJMP(env) setjmp (env) -/* Reset timer WT. This establishes the starting point from which - wtimer_elapsed() will return the number of elapsed - milliseconds. It is allowed to reset a previously used timer. */ +static jmp_buf run_with_timeout_env; -void -wtimer_reset (struct wget_timer *wt) +static void +abort_run_with_timeout (int sig) { -#ifdef TIMER_GETTIMEOFDAY - struct timeval t; - gettimeofday (&t, NULL); - wt->secs = t.tv_sec; - wt->usecs = t.tv_usec; -#endif + assert (sig == SIGALRM); + /* We don't have siglongjmp to preserve the set of blocked signals; + if we longjumped out of the handler at this point, SIGALRM would + remain blocked. We must unblock it manually. */ + sigset_t set; + sigemptyset (&set); + sigaddset (&set, SIGALRM); + sigprocmask (SIG_BLOCK, &set, NULL); -#ifdef TIMER_TIME - wt->secs = time (NULL); -#endif + /* Now it's safe to longjump. */ + longjmp (run_with_timeout_env, -1); +} +# endif /* not HAVE_SIGSETJMP */ -#ifdef TIMER_WINDOWS - FILETIME ft; - SYSTEMTIME st; - GetSystemTime (&st); - SystemTimeToFileTime (&st, &ft); - wt->wintime.HighPart = ft.dwHighDateTime; - wt->wintime.LowPart = ft.dwLowDateTime; -#endif +/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses + setitimer where available, alarm otherwise. + + TIMEOUT should be non-zero. If the timeout value is so small that + it would be rounded to zero, it is rounded to the least legal value + instead (1us for setitimer, 1s for alarm). That ensures that + SIGALRM will be delivered in all cases. */ + +static void +alarm_set (double timeout) +{ +#ifdef ITIMER_REAL + /* Use the modern itimer interface. */ + struct itimerval itv; + xzero (itv); + itv.it_value.tv_sec = (long) timeout; + itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout); + if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0) + /* Ensure that we wait for at least the minimum interval. + Specifying zero would mean "wait forever". */ + itv.it_value.tv_usec = 1; + setitimer (ITIMER_REAL, &itv, NULL); +#else /* not ITIMER_REAL */ + /* Use the old alarm() interface. */ + int secs = (int) timeout; + if (secs == 0) + /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is + because alarm(0) means "never deliver the alarm", i.e. "wait + forever", which is not what someone who specifies a 0.5s + timeout would expect. */ + secs = 1; + alarm (secs); +#endif /* not ITIMER_REAL */ +} + +/* Cancel the alarm set with alarm_set. */ + +static void +alarm_cancel (void) +{ +#ifdef ITIMER_REAL + struct itimerval disable; + xzero (disable); + setitimer (ITIMER_REAL, &disable, NULL); +#else /* not ITIMER_REAL */ + alarm (0); +#endif /* not ITIMER_REAL */ } -/* Return the number of milliseconds elapsed since the timer was last - reset. It is allowed to call this function more than once to get - increasingly higher elapsed values. */ +/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT + seconds. Returns true if the function was interrupted with a + timeout, false otherwise. -long -wtimer_elapsed (struct wget_timer *wt) + This works by setting up SIGALRM to be delivered in TIMEOUT seconds + using setitimer() or alarm(). The timeout is enforced by + longjumping out of the SIGALRM handler. This has several + advantages compared to the traditional approach of relying on + signals causing system calls to exit with EINTR: + + * The callback function is *forcibly* interrupted after the + timeout expires, (almost) regardless of what it was doing and + whether it was in a syscall. For example, a calculation that + takes a long time is interrupted as reliably as an IO + operation. + + * It works with both SYSV and BSD signals because it doesn't + depend on the default setting of SA_RESTART. + + * It doesn't require special handler setup beyond a simple call + to signal(). (It does use sigsetjmp/siglongjmp, but they're + optional.) + + The only downside is that, if FUN allocates internal resources that + are normally freed prior to exit from the functions, they will be + lost in case of timeout. */ + +bool +run_with_timeout (double timeout, void (*fun) (void *), void *arg) { -#ifdef TIMER_GETTIMEOFDAY - struct timeval t; - gettimeofday (&t, NULL); - return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000; -#endif + int saved_errno; -#ifdef TIMER_TIME - time_t now = time (NULL); - return 1000 * (now - wt->secs); -#endif + if (timeout == 0) + { + fun (arg); + return false; + } -#ifdef WINDOWS - FILETIME ft; - SYSTEMTIME st; - ULARGE_INTEGER uli; - GetSystemTime (&st); - SystemTimeToFileTime (&st, &ft); - uli.HighPart = ft.dwHighDateTime; - uli.LowPart = ft.dwLowDateTime; - return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000); -#endif + signal (SIGALRM, abort_run_with_timeout); + if (SETJMP (run_with_timeout_env) != 0) + { + /* Longjumped out of FUN with a timeout. */ + signal (SIGALRM, SIG_DFL); + return true; + } + alarm_set (timeout); + fun (arg); + + /* Preserve errno in case alarm() or signal() modifies it. */ + saved_errno = errno; + alarm_cancel (); + signal (SIGALRM, SIG_DFL); + errno = saved_errno; + + return false; } -/* Return the assessed granularity of the timer implementation. This - is important for certain code that tries to deal with "zero" time - intervals. */ +#else /* not USE_SIGNAL_TIMEOUT */ -long -wtimer_granularity (void) +#ifndef WINDOWS +/* A stub version of run_with_timeout that just calls FUN(ARG). Don't + define it under Windows, because Windows has its own version of + run_with_timeout that uses threads. */ + +bool +run_with_timeout (double timeout, void (*fun) (void *), void *arg) { -#ifdef TIMER_GETTIMEOFDAY - /* Granularity of gettimeofday is hugely architecture-dependent. - However, it appears that on modern machines it is better than - 1ms. */ - return 1; -#endif + fun (arg); + return false; +} +#endif /* not WINDOWS */ +#endif /* not USE_SIGNAL_TIMEOUT */ + +#ifndef WINDOWS -#ifdef TIMER_TIME - /* This is clear. */ - return 1000; -#endif +/* Sleep the specified amount of seconds. On machines without + nanosleep(), this may sleep shorter if interrupted by signals. */ -#ifdef TIMER_WINDOWS - /* ? */ - return 1; +void +xsleep (double seconds) +{ +#ifdef HAVE_NANOSLEEP + /* nanosleep is the preferred interface because it offers high + accuracy and, more importantly, because it allows us to reliably + restart receiving a signal such as SIGWINCH. (There was an + actual Debian bug report about --limit-rate malfunctioning while + the terminal was being resized.) */ + struct timespec sleep, remaining; + sleep.tv_sec = (long) seconds; + sleep.tv_nsec = 1000000000 * (seconds - (long) seconds); + while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR) + /* If nanosleep has been interrupted by a signal, adjust the + sleeping period and return to sleep. */ + sleep = remaining; +#elif defined(HAVE_USLEEP) + /* If usleep is available, use it in preference to select. */ + if (seconds >= 1) + { + /* On some systems, usleep cannot handle values larger than + 1,000,000. If the period is larger than that, use sleep + first, then add usleep for subsecond accuracy. */ + sleep (seconds); + seconds -= (long) seconds; + } + usleep (seconds * 1000000); +#else /* fall back select */ + /* Note that, although Windows supports select, it can't be used to + implement sleeping because Winsock's select doesn't implement + timeout when it is passed NULL pointers for all fd sets. (But it + does under Cygwin, which implements Unix-compatible select.) */ + struct timeval sleep; + sleep.tv_sec = (long) seconds; + sleep.tv_usec = 1000000 * (seconds - (long) seconds); + select (0, NULL, NULL, NULL, &sleep); + /* If select returns -1 and errno is EINTR, it means we were + interrupted by a signal. But without knowing how long we've + actually slept, we can't return to sleep. Using gettimeofday to + track sleeps is slow and unreliable due to clock skew. */ #endif } - -/* This should probably be at a better place, but it doesn't really - fit into html-parse.c. */ -/* The function returns the pointer to the malloc-ed quoted version of - string s. It will recognize and quote numeric and special graphic - entities, as per RFC1866: +#endif /* not WINDOWS */ - `&' -> `&' - `<' -> `<' - `>' -> `>' - `"' -> `"' - SP -> ` ' +/* Encode the octets in DATA of length LENGTH to base64 format, + storing the result to DEST. The output will be zero-terminated, + and must point to a writable buffer of at least + 1+BASE64_LENGTH(length) bytes. The function returns the length of + the resulting base64 data, not counting the terminating zero. - No other entities are recognized or replaced. */ -char * -html_quote_string (const char *s) -{ - const char *b = s; - char *p, *res; - int i; + This implementation does not emit newlines after 76 characters of + base64 data. */ - /* Pass through the string, and count the new size. */ - for (i = 0; *s; s++, i++) +size_t +base64_encode (const void *data, size_t length, char *dest) +{ + /* Conversion table. */ + static const char tbl[64] = { + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', + 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', + 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', + 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' + }; + /* Access bytes in DATA as unsigned char, otherwise the shifts below + don't work for data with MSB set. */ + const unsigned char *s = data; + /* Theoretical ANSI violation when length < 3. */ + const unsigned char *end = (const unsigned char *) data + length - 2; + char *p = dest; + + /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ + for (; s < end; s += 3) { - if (*s == '&') - i += 4; /* `amp;' */ - else if (*s == '<' || *s == '>') - i += 3; /* `lt;' and `gt;' */ - else if (*s == '\"') - i += 5; /* `quot;' */ - else if (*s == ' ') - i += 4; /* #32; */ + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; + *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; + *p++ = tbl[s[2] & 0x3f]; } - res = (char *)xmalloc (i + 1); - s = b; - for (p = res; *s; s++) + + /* Pad the result if necessary... */ + switch (length % 3) { - switch (*s) - { - case '&': - *p++ = '&'; - *p++ = 'a'; - *p++ = 'm'; - *p++ = 'p'; - *p++ = ';'; - break; - case '<': case '>': - *p++ = '&'; - *p++ = (*s == '<' ? 'l' : 'g'); - *p++ = 't'; - *p++ = ';'; - break; - case '\"': - *p++ = '&'; - *p++ = 'q'; - *p++ = 'u'; - *p++ = 'o'; - *p++ = 't'; - *p++ = ';'; - break; - case ' ': - *p++ = '&'; - *p++ = '#'; - *p++ = '3'; - *p++ = '2'; - *p++ = ';'; - break; - default: - *p++ = *s; - } + case 1: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[(s[0] & 3) << 4]; + *p++ = '='; + *p++ = '='; + break; + case 2: + *p++ = tbl[s[0] >> 2]; + *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; + *p++ = tbl[((s[1] & 0xf) << 2)]; + *p++ = '='; + break; } + /* ...and zero-terminate it. */ *p = '\0'; - return res; + + return p - dest; } -/* Determine the width of the terminal we're running on. If that's - not possible, return 0. */ +/* Store in C the next non-whitespace character from the string, or \0 + when end of string is reached. */ +#define NEXT_CHAR(c, p) do { \ + c = (unsigned char) *p++; \ +} while (c_isspace (c)) -int -determine_screen_width (void) +#define IS_ASCII(c) (((c) & 0x80) == 0) + +/* Decode data from BASE64 (a null-terminated string) into memory + pointed to by DEST. DEST is assumed to be large enough to + accomodate the decoded data, which is guaranteed to be no more than + 3/4*strlen(base64). + + Since DEST is assumed to contain binary data, it is not + NUL-terminated. The function returns the length of the data + written to TO. -1 is returned in case of error caused by malformed + base64 input. + + This function originates from Free Recode. */ + +ssize_t +base64_decode (const char *base64, void *dest) { - /* If there's a way to get the terminal size using POSIX - tcgetattr(), somebody please tell me. */ -#ifndef TIOCGWINSZ - return 0; -#else /* TIOCGWINSZ */ - int fd; - struct winsize wsz; + /* Table of base64 values for first 128 characters. Note that this + assumes ASCII (but so does Wget in other places). */ + static const signed char base64_char_to_value[128] = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */ + -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */ + 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */ + -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */ + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */ + 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */ + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */ + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */ + 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */ + }; +#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c]) +#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=') + + const char *p = base64; + char *q = dest; - if (opt.lfilename != NULL) - return 0; + while (1) + { + unsigned char c; + unsigned long value; + + /* Process first byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + break; + if (c == '=' || !IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + value = BASE64_CHAR_TO_VALUE (c) << 18; + + /* Process second byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (c == '=' || !IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + value |= BASE64_CHAR_TO_VALUE (c) << 12; + *q++ = value >> 16; + + /* Process third byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (!IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + + if (c == '=') + { + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (c != '=') + return -1; /* padding `=' expected but not found */ + continue; + } + + value |= BASE64_CHAR_TO_VALUE (c) << 6; + *q++ = 0xff & value >> 8; + + /* Process fourth byte of a quadruplet. */ + NEXT_CHAR (c, p); + if (!c) + return -1; /* premature EOF while decoding base64 */ + if (c == '=') + continue; + if (!IS_BASE64 (c)) + return -1; /* illegal char while decoding base64 */ + + value |= BASE64_CHAR_TO_VALUE (c); + *q++ = 0xff & value; + } +#undef IS_BASE64 +#undef BASE64_CHAR_TO_VALUE - fd = fileno (stderr); - if (ioctl (fd, TIOCGWINSZ, &wsz) < 0) - return 0; /* most likely ENOTTY */ + return q - (char *) dest; +} - return wsz.ws_col; -#endif /* TIOCGWINSZ */ +#ifdef HAVE_LIBPCRE +/* Compiles the PCRE regex. */ +void * +compile_pcre_regex (const char *str) +{ + const char *errbuf; + int erroffset; + pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0); + if (! regex) + { + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + return false; + } + return regex; } +#endif + +/* Compiles the POSIX regex. */ +void * +compile_posix_regex (const char *str) +{ + regex_t *regex = xmalloc (sizeof (regex_t)); + int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB); + if (errcode != 0) + { + size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + regerror (errcode, (regex_t *) regex, errbuf, errbuf_size); + fprintf (stderr, _("Invalid regular expression %s, %s\n"), + quote (str), errbuf); + xfree (errbuf); + return NULL; + } -#if 0 -/* A debugging function for checking whether an MD5 library works. */ + return regex; +} -#include "gen-md5.h" +#ifdef HAVE_LIBPCRE +#define OVECCOUNT 30 +/* Matches a PCRE regex. */ +bool +match_pcre_regex (const void *regex, const char *str) +{ + size_t l = strlen (str); + int ovector[OVECCOUNT]; -char * -debug_test_md5 (char *buf) -{ - unsigned char raw[16]; - static char res[33]; - unsigned char *p1; - char *p2; - int cnt; - ALLOCA_MD5_CONTEXT (ctx); - - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)buf, strlen (buf), ctx); - gen_md5_finish (ctx, raw); - - p1 = raw; - p2 = res; - cnt = 16; - while (cnt--) + int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT); + if (rc == PCRE_ERROR_NOMATCH) + return false; + else if (rc < 0) { - *p2++ = XDIGIT_TO_xchar (*p1 >> 4); - *p2++ = XDIGIT_TO_xchar (*p1 & 0xf); - ++p1; + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + return false; } - *p2 = '\0'; - - return res; + else + return true; } +#undef OVECCOUNT #endif -#if 0 -/* Debugging and testing support for path_simplify. */ - -/* Debug: run path_simplify on PATH and return the result in a new - string. Useful for calling from the debugger. */ -static char * -ps (char *path) +/* Matches a POSIX regex. */ +bool +match_posix_regex (const void *regex, const char *str) { - char *copy = xstrdup (path); - path_simplify (copy); - return copy; + int rc = regexec ((regex_t *) regex, str, 0, NULL, 0); + if (rc == REG_NOMATCH) + return false; + else if (rc == 0) + return true; + else + { + size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0); + char *errbuf = xmalloc (errbuf_size); + regerror (rc, opt.acceptregex, errbuf, errbuf_size); + logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), + quote (str), rc); + xfree (errbuf); + return false; + } } +#undef IS_ASCII +#undef NEXT_CHAR + +/* Simple merge sort for use by stable_sort. Implementation courtesy + Zeljko Vrba with additional debugging by Nenad Barbutov. */ + static void -run_test (char *test, char *expected_result, int expected_change) +mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to, + int (*cmpfun) (const void *, const void *)) { - char *test_copy = xstrdup (test); - int modified = path_simplify (test_copy); +#define ELT(array, pos) ((char *)(array) + (pos) * size) + if (from < to) + { + size_t i, j, k; + size_t mid = (to + from) / 2; + mergesort_internal (base, temp, size, from, mid, cmpfun); + mergesort_internal (base, temp, size, mid + 1, to, cmpfun); + i = from; + j = mid + 1; + for (k = from; (i <= mid) && (j <= to); k++) + if (cmpfun (ELT (base, i), ELT (base, j)) <= 0) + memcpy (ELT (temp, k), ELT (base, i++), size); + else + memcpy (ELT (temp, k), ELT (base, j++), size); + while (i <= mid) + memcpy (ELT (temp, k++), ELT (base, i++), size); + while (j <= to) + memcpy (ELT (temp, k++), ELT (base, j++), size); + for (k = from; k <= to; k++) + memcpy (ELT (base, k), ELT (temp, k), size); + } +#undef ELT +} - if (0 != strcmp (test_copy, expected_result)) +/* Stable sort with interface exactly like standard library's qsort. + Uses mergesort internally, allocating temporary storage with + alloca. */ + +void +stable_sort (void *base, size_t nmemb, size_t size, + int (*cmpfun) (const void *, const void *)) +{ + if (size > 1) { - printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n", - test, expected_result, test_copy); + void *temp = alloca (nmemb * size * sizeof (void *)); + mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun); } - if (modified != expected_change) +} + +/* Print a decimal number. If it is equal to or larger than ten, the + number is rounded. Otherwise it is printed with one significant + digit without trailing zeros and with no more than three fractional + digits total. For example, 0.1 is printed as "0.1", 0.035 is + printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0". + + This is useful for displaying durations because it provides + order-of-magnitude information without unnecessary clutter -- + long-running downloads are shown without the fractional part, and + short ones still retain one significant digit. */ + +const char * +print_decimal (double number) +{ + static char buf[32]; + double n = number >= 0 ? number : -number; + + if (n >= 9.95) + /* Cut off at 9.95 because the below %.1f would round 9.96 to + "10.0" instead of "10". OTOH 9.94 will print as "9.9". */ + snprintf (buf, sizeof buf, "%.0f", number); + else if (n >= 0.95) + snprintf (buf, sizeof buf, "%.1f", number); + else if (n >= 0.001) + snprintf (buf, sizeof buf, "%.1g", number); + else if (n >= 0.0005) + /* round [0.0005, 0.001) to 0.001 */ + snprintf (buf, sizeof buf, "%.3f", number); + else + /* print numbers close to 0 as 0, not 0.000 */ + strcpy (buf, "0"); + + return buf; +} + +/* Get the maximum name length for the given path. */ +/* Return 0 if length is unknown. */ +long +get_max_length (const char *path, int length, int name) +{ + long ret; + char *p, *d; + + /* Make a copy of the path that we can modify. */ + p = path ? strdupdelim (path, path + length) : strdup (""); + + for (;;) { - if (expected_change == 1) - printf ("Expected no modification with path_simplify(\"%s\").\n", - test); + errno = 0; + /* For an empty path query the current directory. */ +#if HAVE_PATHCONF + ret = pathconf (*p ? p : ".", name); + if (!(ret < 0 && errno == ENOENT)) + break; +#else + ret = PATH_MAX; +#endif + + /* The path does not exist yet, but may be created. */ + /* Already at current or root directory, give up. */ + if (!*p || strcmp (p, "/") == 0) + break; + + /* Remove one directory level and try again. */ + d = strrchr (p, '/'); + if (d == p) + p[1] = '\0'; /* check root directory */ + else if (d) + *d = '\0'; /* remove last directory part */ else - printf ("Expected modification with path_simplify(\"%s\").\n", - test); + *p = '\0'; /* check current directory */ } - xfree (test_copy); + + xfree (p); + + if (ret < 0) + { + /* pathconf() has a message for us. */ + if (errno != 0) + perror ("pathconf"); + + /* If (errno == 0) then there is no max length. + Even on error return 0 so the caller can continue. */ + return 0; + } + + return ret; } -static void -test_path_simplify (void) +#ifdef TESTING + +const char * +test_subdir_p(void) { - static struct { - char *test, *result; - int should_modify; - } tests[] = { - { "", "", 0 }, - { ".", "", 1 }, - { "..", "", 1 }, - { "foo", "foo", 0 }, - { "foo/bar", "foo/bar", 0 }, - { "foo///bar", "foo/bar", 1 }, - { "foo/.", "foo/", 1 }, - { "foo/./", "foo/", 1 }, - { "foo./", "foo./", 0 }, - { "foo/../bar", "bar", 1 }, - { "foo/../bar/", "bar/", 1 }, - { "foo/bar/..", "foo/", 1 }, - { "foo/bar/../x", "foo/x", 1 }, - { "foo/bar/../x/", "foo/x/", 1 }, - { "foo/..", "", 1 }, - { "foo/../..", "", 1 }, - { "a/b/../../c", "c", 1 }, - { "./a/../b", "b", 1 } + static const struct { + const char *d1; + const char *d2; + bool result; + } test_array[] = { + { "/somedir", "/somedir", true }, + { "/somedir", "/somedir/d2", true }, + { "/somedir/d1", "/somedir", false }, }; - int i; + unsigned i; - for (i = 0; i < ARRAY_SIZE (tests); i++) + for (i = 0; i < countof(test_array); ++i) { - char *test = tests[i].test; - char *expected_result = tests[i].result; - int expected_change = tests[i].should_modify; - run_test (test, expected_result, expected_change); - } + bool res = subdir_p (test_array[i].d1, test_array[i].d2); - /* Now run all the tests with a leading slash before the test case, - to prove that the slash is being preserved. */ - for (i = 0; i < ARRAY_SIZE (tests); i++) - { - char *test, *expected_result; - int expected_change = tests[i].should_modify; + mu_assert ("test_subdir_p: wrong result", + res == test_array[i].result); + } - test = xmalloc (1 + strlen (tests[i].test) + 1); - sprintf (test, "/%s", tests[i].test); + return NULL; +} - expected_result = xmalloc (1 + strlen (tests[i].result) + 1); - sprintf (expected_result, "/%s", tests[i].result); +const char * +test_dir_matches_p(void) +{ + static struct { + const char *dirlist[3]; + const char *dir; + bool result; + } test_array[] = { + { { "/somedir", "/someotherdir", NULL }, "somedir", true }, + { { "/somedir", "/someotherdir", NULL }, "anotherdir", false }, + { { "/somedir", "/*otherdir", NULL }, "anotherdir", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true }, + { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true }, + { { "/somedir/d1", "/someotherdir", NULL }, "d1", false }, + { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true }, + { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true }, + { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true }, + { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false }, + { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true }, + { { "/dir with spaces", NULL, NULL }, "dir with spaces", true }, + { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true }, + { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false }, + { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false }, + }; + unsigned i; - run_test (test, expected_result, expected_change); + for (i = 0; i < countof(test_array); ++i) + { + bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); - xfree (test); - xfree (expected_result); + mu_assert ("test_dir_matches_p: wrong result", + res == test_array[i].result); } + + return NULL; } -#endif + +#endif /* TESTING */ +