X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Futils.c;h=a427e7367706d1c043844e749f85f26c9fe2059a;hb=5f0a2b3f0846dd4c2f72fc62e7171200d1fd6e06;hp=31aab9c13efb6d875027864983da1d8b6046cbc6;hpb=31d6616c483359af431f4c33c3c5b237cd8d4426;p=wget diff --git a/src/utils.c b/src/utils.c index 31aab9c1..a427e736 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,21 +1,31 @@ -/* Various functions of utilitarian nature. - Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. +/* Various utility functions. + Copyright (C) 2003 Free Software Foundation, Inc. -This file is part of Wget. +This file is part of GNU Wget. -This program is free software; you can redistribute it and/or modify +GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. -This program is distributed in the hope that it will be useful, +GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +along with Wget; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include @@ -26,11 +36,13 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #else /* not HAVE_STRING_H */ # include #endif /* not HAVE_STRING_H */ -#include #include #ifdef HAVE_UNISTD_H # include #endif +#ifdef HAVE_MMAP +# include +#endif #ifdef HAVE_PWD_H # include #endif @@ -45,82 +57,75 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef NeXT # include /* for access() */ #endif +#include +#include + +/* For TIOCGWINSZ and friends: */ +#ifdef HAVE_SYS_IOCTL_H +# include +#endif +#ifdef HAVE_TERMIOS_H +# include +#endif + +/* Needed for run_with_timeout. */ +#undef USE_SIGNAL_TIMEOUT +#ifdef HAVE_SIGNAL_H +# include +#endif +#ifdef HAVE_SETJMP_H +# include +#endif + +#ifndef HAVE_SIGSETJMP +/* If sigsetjmp is a macro, configure won't pick it up. */ +# ifdef sigsetjmp +# define HAVE_SIGSETJMP +# endif +#endif + +#ifdef HAVE_SIGNAL +# ifdef HAVE_SIGSETJMP +# define USE_SIGNAL_TIMEOUT +# endif +# ifdef HAVE_SIGBLOCK +# define USE_SIGNAL_TIMEOUT +# endif +#endif #include "wget.h" #include "utils.h" -#include "fnmatch.h" +#include "hash.h" #ifndef errno extern int errno; #endif +/* Utility function: like xstrdup(), but also lowercases S. */ -/* Croak the fatal memory error and bail out with non-zero exit - status. */ -static void -memfatal (const char *s) +char * +xstrdup_lower (const char *s) { - /* HACK: expose save_log_p from log.c, so we can turn it off in - order to prevent saving the log. Saving the log is dangerous - because logprintf() and logputs() can call malloc(), so this - could infloop. When logging is turned off, infloop can no longer - happen. */ - extern int save_log_p; - - save_log_p = 0; - logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, s); - exit (1); + char *copy = xstrdup (s); + char *p = copy; + for (; *p; p++) + *p = TOLOWER (*p); + return copy; } -/* xmalloc, xrealloc and xstrdup exit the program if there is not - enough memory. xstrdup also implements strdup on systems that do - not have it. */ -void * -xmalloc (size_t size) -{ - void *res; +/* Return a count of how many times CHR occurs in STRING. */ - res = malloc (size); - if (!res) - memfatal ("malloc"); - return res; -} - -void * -xrealloc (void *obj, size_t size) +int +count_char (const char *string, char chr) { - void *res; - - /* Not all Un*xes have the feature of realloc() that calling it with - a NULL-pointer is the same as malloc(), but it is easy to - simulate. */ - if (obj) - res = realloc (obj, size); - else - res = malloc (size); - if (!res) - memfatal ("realloc"); - return res; + const char *p; + int count = 0; + for (p = string; *p; p++) + if (*p == chr) + ++count; + return count; } -char * -xstrdup (const char *s) -{ -#ifndef HAVE_STRDUP - int l = strlen (s); - char *s1 = malloc (l + 1); - if (!s1) - memfatal ("strdup"); - memcpy (s1, s, l + 1); - return s1; -#else /* HAVE_STRDUP */ - char *s1 = strdup (s); - if (!s1) - memfatal ("strdup"); - return s1; -#endif /* HAVE_STRDUP */ -} - /* Copy the string formed by two pointers (one on the beginning, other on the char after the last char) to a new, malloc-ed location. 0-terminate it. */ @@ -170,79 +175,62 @@ sepstring (const char *s) } /* Return pointer to a static char[] buffer in which zero-terminated - string-representation of TM (in form hh:mm:ss) is printed. It is - shamelessly non-reentrant, but it doesn't matter, really. + string-representation of TM (in form hh:mm:ss) is printed. + + If TM is non-NULL, the current time-in-seconds will be stored + there. + + (#### This is misleading: one would expect TM would be used instead + of the current time in that case. This design was probably + influenced by the design time(2), and should be changed at some + points. No callers use non-NULL TM anyway.) */ - If TM is non-NULL, the time_t of the current time will be stored - there. */ char * time_str (time_t *tm) { - static char tms[15]; + static char output[15]; struct tm *ptm; - time_t tim; - - *tms = '\0'; - tim = time (tm); - if (tim == -1) - return tms; - ptm = localtime (&tim); - sprintf (tms, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec); - return tms; -} - -/* Returns an error message for ERRNUM. #### This requires more work. - This function, as well as the whole error system, is very - ill-conceived. */ -const char * -uerrmsg (uerr_t errnum) -{ - switch (errnum) - { - case URLUNKNOWN: - return _("Unknown/unsupported protocol"); - break; - case URLBADPORT: - return _("Invalid port specification"); - break; - case URLBADHOST: - return _("Invalid host name"); - break; - default: - abort (); - /* $@#@#$ compiler. */ - return NULL; + time_t secs = time (tm); + + if (secs == -1) + { + /* In case of error, return the empty string. Maybe we should + just abort if this happens? */ + *output = '\0'; + return output; } + ptm = localtime (&secs); + sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec); + return output; } - -/* The Windows versions of the following two functions are defined in - mswindows.c. */ -/* A cuserid() immitation using getpwuid(), to avoid hassling with - utmp. Besides, not all systems have cuesrid(). Under Windows, it - is defined in mswindows.c. +/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ - If WHERE is non-NULL, the username will be stored there. - Otherwise, it will be returned as a static buffer (as returned by - getpwuid()). In the latter case, the buffer should be copied - before calling getpwuid() or pwd_cuserid() again. */ -#ifndef WINDOWS char * -pwd_cuserid (char *where) +datetime_str (time_t *tm) { - struct passwd *pwd; + static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */ + struct tm *ptm; + time_t secs = time (tm); - if (!(pwd = getpwuid (getuid ())) || !pwd->pw_name) - return NULL; - if (where) + if (secs == -1) { - strcpy (where, pwd->pw_name); - return where; + /* In case of error, return the empty string. Maybe we should + just abort if this happens? */ + *output = '\0'; + return output; } - else - return pwd->pw_name; + ptm = localtime (&secs); + sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d", + ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday, + ptm->tm_hour, ptm->tm_min, ptm->tm_sec); + return output; } + +/* The Windows versions of the following two functions are defined in + mswindows.c. */ +#ifndef WINDOWS void fork_to_background (void) { @@ -252,7 +240,7 @@ fork_to_background (void) if (!opt.lfilename) { - opt.lfilename = unique_name (DEFAULT_LOGFILE); + opt.lfilename = unique_name (DEFAULT_LOGFILE, 0); changedp = 1; } pid = fork (); @@ -265,137 +253,19 @@ fork_to_background (void) else if (pid != 0) { /* parent, no error */ - printf (_("Continuing in background.\n")); + printf (_("Continuing in background, pid %d.\n"), (int)pid); if (changedp) printf (_("Output will be written to `%s'.\n"), opt.lfilename); - exit (0); + exit (0); /* #### should we use _exit()? */ } - /* child: keep running */ -} -#endif /* not WINDOWS */ - -/* Canonicalize PATH, and return a new path. The new path differs from PATH - in that: - Multple `/'s are collapsed to a single `/'. - Leading `./'s and trailing `/.'s are removed. - Trailing `/'s are removed. - Non-leading `../'s and trailing `..'s are handled by removing - portions of the path. - - E.g. "a/b/c/./../d/.." will yield "a/b". This function originates - from GNU Bash. - - Changes for Wget: - Always use '/' as stub_char. - Don't check for local things using canon_stat. - Change the original string instead of strdup-ing. - React correctly when beginning with `./' and `../'. */ -void -path_simplify (char *path) -{ - register int i, start, ddot; - char stub_char; - - if (!*path) - return; - - /*stub_char = (*path == '/') ? '/' : '.';*/ - stub_char = '/'; - /* Addition: Remove all `./'-s preceding the string. If `../'-s - precede, put `/' in front and remove them too. */ - i = 0; - ddot = 0; - while (1) - { - if (path[i] == '.' && path[i + 1] == '/') - i += 2; - else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/') - { - i += 3; - ddot = 1; - } - else - break; - } - if (i) - strcpy (path, path + i - ddot); - - /* Replace single `.' or `..' with `/'. */ - if ((path[0] == '.' && path[1] == '\0') - || (path[0] == '.' && path[1] == '.' && path[2] == '\0')) - { - path[0] = stub_char; - path[1] = '\0'; - return; - } - /* Walk along PATH looking for things to compact. */ - i = 0; - while (1) - { - if (!path[i]) - break; - - while (path[i] && path[i] != '/') - i++; - - start = i++; - - /* If we didn't find any slashes, then there is nothing left to do. */ - if (!path[start]) - break; - - /* Handle multiple `/'s in a row. */ - while (path[i] == '/') - i++; - - if ((start + 1) != i) - { - strcpy (path + start + 1, path + i); - i = start + 1; - } - - /* Check for trailing `/'. */ - if (start && !path[i]) - { - zero_last: - path[--i] = '\0'; - break; - } - - /* Check for `../', `./' or trailing `.' by itself. */ - if (path[i] == '.') - { - /* Handle trailing `.' by itself. */ - if (!path[i + 1]) - goto zero_last; - - /* Handle `./'. */ - if (path[i + 1] == '/') - { - strcpy (path + i, path + i + 1); - i = (start < 0) ? 0 : start; - continue; - } - - /* Handle `../' or trailing `..' by itself. */ - if (path[i + 1] == '.' && - (path[i + 2] == '/' || !path[i + 2])) - { - while (--start > -1 && path[start] != '/'); - strcpy (path + start + 1, path + i + 2); - i = (start < 0) ? 0 : start; - continue; - } - } /* path == '.' */ - } /* while */ - - if (!*path) - { - *path = stub_char; - path[1] = '\0'; - } + /* child: give up the privileges and keep running. */ + setsid (); + freopen ("/dev/null", "r", stdin); + freopen ("/dev/null", "w", stdout); + freopen ("/dev/null", "w", stderr); } +#endif /* not WINDOWS */ /* "Touch" FILE, i.e. make its atime and mtime equal to the time specified with TM. */ @@ -411,7 +281,7 @@ touch (const char *file, time_t tm) #endif if (utime (file, ×) == -1) - logprintf (LOG_NOTQUIET, "utime: %s\n", strerror (errno)); + logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); } /* Checks if FILE is a symbolic link, and removes it if it is. Does @@ -464,39 +334,73 @@ file_non_directory_p (const char *path) return S_ISDIR (buf.st_mode) ? 0 : 1; } -/* Return a unique filename, given a prefix and count */ +/* Return the size of file named by FILENAME, or -1 if it cannot be + opened or seeked into. */ +long +file_size (const char *filename) +{ + long size; + /* We use fseek rather than stat to determine the file size because + that way we can also verify whether the file is readable. + Inspired by the POST patch by Arnaud Wylie. */ + FILE *fp = fopen (filename, "rb"); + if (!fp) + return -1; + fseek (fp, 0, SEEK_END); + size = ftell (fp); + fclose (fp); + return size; +} + +/* stat file names named PREFIX.1, PREFIX.2, etc., until one that + doesn't exist is found. Return a freshly allocated copy of the + unused file name. */ + static char * -unique_name_1 (const char *fileprefix, int count) +unique_name_1 (const char *prefix) { - char *filename; + int count = 1; + int plen = strlen (prefix); + char *template = (char *)alloca (plen + 1 + 24); + char *template_tail = template + plen; - if (count) - { - filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2); - sprintf (filename, "%s.%d", fileprefix, count); - } - else - filename = xstrdup (fileprefix); + memcpy (template, prefix, plen); + *template_tail++ = '.'; - if (!file_exists_p (filename)) - return filename; - else - { - free (filename); - return NULL; - } + do + number_to_string (template_tail, count++); + while (file_exists_p (template)); + + return xstrdup (template); } -/* Return a unique file name, based on PREFIX. */ +/* Return a unique file name, based on FILE. + + More precisely, if FILE doesn't exist, it is returned unmodified. + If not, FILE.1 is tried, then FILE.2, etc. The first FILE. + file name that doesn't exist is returned. + + The resulting file is not created, only verified that it didn't + exist at the point in time when the function was called. + Therefore, where security matters, don't rely that the file created + by this function exists until you open it with O_EXCL or + something. + + If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated + string. Otherwise, it may return FILE if the file doesn't exist + (and therefore doesn't need changing). */ + char * -unique_name (const char *prefix) +unique_name (const char *file, int allow_passthrough) { - char *file = NULL; - int count = 0; - - while (!file) - file = unique_name_1 (prefix, count++); - return file; + /* If the FILE itself doesn't exist, return it without + modification. */ + if (!file_exists_p (file)) + return allow_passthrough ? (char *)file : xstrdup (file); + + /* Otherwise, find a numeric suffix that results in unused file name + and return it. */ + return unique_name_1 (file); } /* Create DIRECTORY. If some of the pathname components of DIRECTORY @@ -510,6 +414,7 @@ make_directory (const char *directory) { int quit = 0; int i; + int ret = 0; char *dir; /* Make a copy of dir, to be able to write to it. Otherwise, the @@ -525,18 +430,45 @@ make_directory (const char *directory) if (!dir[i]) quit = 1; dir[i] = '\0'; - /* Check whether the directory already exists. */ + /* Check whether the directory already exists. Allow creation of + of intermediate directories to fail, as the initial path components + are not necessarily directories! */ if (!file_exists_p (dir)) - { - if (mkdir (dir, 0777) < 0) - return -1; - } + ret = mkdir (dir, 0777); + else + ret = 0; if (quit) break; else dir[i] = '/'; } - return 0; + return ret; +} + +/* Merge BASE with FILE. BASE can be a directory or a file name, FILE + should be a file name. + + file_merge("/foo/bar", "baz") => "/foo/baz" + file_merge("/foo/bar/", "baz") => "/foo/bar/baz" + file_merge("foo", "bar") => "bar" + + In other words, it's a simpler and gentler version of uri_merge_1. */ + +char * +file_merge (const char *base, const char *file) +{ + char *result; + const char *cut = (const char *)strrchr (base, '/'); + + if (!cut) + return xstrdup (file); + + result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1); + memcpy (result, base, cut - base); + result[cut - base] = '/'; + strcpy (result + (cut - base) + 1, file); + + return result; } static int in_acclist PARAMS ((const char *const *, const char *, int)); @@ -621,20 +553,37 @@ accdir (const char *directory, enum accd flags) return 1; } -/* Match the end of STRING against PATTERN. For instance: +/* Return non-zero if STRING ends with TAIL. For instance: - match_backwards ("abc", "bc") -> 1 - match_backwards ("abc", "ab") -> 0 - match_backwards ("abc", "abc") -> 1 */ -static int -match_backwards (const char *string, const char *pattern) + match_tail ("abc", "bc", 0) -> 1 + match_tail ("abc", "ab", 0) -> 0 + match_tail ("abc", "abc", 0) -> 1 + + If FOLD_CASE_P is non-zero, the comparison will be + case-insensitive. */ + +int +match_tail (const char *string, const char *tail, int fold_case_p) { int i, j; - for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--) - if (string[i] != pattern[j]) - break; - /* If the pattern was exhausted, the match was succesful. */ + /* We want this to be fast, so we code two loops, one with + case-folding, one without. */ + + if (!fold_case_p) + { + for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) + if (string[i] != tail[j]) + break; + } + else + { + for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) + if (TOLOWER (string[i]) != TOLOWER (tail[j])) + break; + } + + /* If the tail was exhausted, the match was succesful. */ if (j == -1) return 1; else @@ -642,7 +591,7 @@ match_backwards (const char *string, const char *pattern) } /* Checks whether string S matches each element of ACCEPTS. A list - element are matched either with fnmatch() or match_backwards(), + element are matched either with fnmatch() or match_tail(), according to whether the element contains wildcards or not. If the BACKWARD is 0, don't do backward comparison -- just compare @@ -663,7 +612,7 @@ in_acclist (const char *const *accepts, const char *s, int backward) { if (backward) { - if (match_backwards (s, *accepts)) + if (match_tail (s, *accepts, 0)) return 1; } else @@ -676,7 +625,7 @@ in_acclist (const char *const *accepts, const char *s, int backward) return 0; } -/* Return the malloc-ed suffix of STR. For instance: +/* Return the location of STR's suffix (file extension). Examples: suffix ("foo.bar") -> "bar" suffix ("foo.bar.baz") -> "baz" suffix ("/foo/bar") -> NULL @@ -686,71 +635,244 @@ suffix (const char *str) { int i; - for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--); + for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--) + ; + if (str[i++] == '.') - return xstrdup (str + i); + return (char *)str + i; else return NULL; } -/* Read a line from FP. The function reallocs the storage as needed - to accomodate for any length of the line. Reallocs are done - storage exponentially, doubling the storage after each overflow to - minimize the number of calls to realloc(). +/* Return non-zero if S contains globbing wildcards (`*', `?', `[' or + `]'). */ + +int +has_wildcards_p (const char *s) +{ + for (; *s; s++) + if (*s == '*' || *s == '?' || *s == '[' || *s == ']') + return 1; + return 0; +} + +/* Return non-zero if FNAME ends with a typical HTML suffix. The + following (case-insensitive) suffixes are presumed to be HTML files: + + html + htm + ?html (`?' matches one character) + + #### CAVEAT. This is not necessarily a good indication that FNAME + refers to a file that contains HTML! */ +int +has_html_suffix_p (const char *fname) +{ + char *suf; + + if ((suf = suffix (fname)) == NULL) + return 0; + if (!strcasecmp (suf, "html")) + return 1; + if (!strcasecmp (suf, "htm")) + return 1; + if (suf[0] && !strcasecmp (suf + 1, "html")) + return 1; + return 0; +} + +/* Read a line from FP and return the pointer to freshly allocated + storage. The storage space is obtained through malloc() and should + be freed with free() when it is no longer needed. + + The length of the line is not limited, except by available memory. + The newline character at the end of line is retained. The line is + terminated with a zero character. + + After end-of-file is encountered without anything being read, NULL + is returned. NULL is also returned on error. To distinguish + between these two cases, use the stdio function ferror(). */ - It is not an exemplary of correctness, since it kills off the - newline (and no, there is no way to know if there was a newline at - EOF). */ char * read_whole_line (FILE *fp) { - char *line; - int i, bufsize, c; + int length = 0; + int bufsize = 82; + char *line = (char *)xmalloc (bufsize); - i = 0; - bufsize = 40; - line = (char *)xmalloc (bufsize); - /* Construct the line. */ - while ((c = getc (fp)) != EOF && c != '\n') + while (fgets (line + length, bufsize - length, fp)) { - if (i > bufsize - 1) - line = (char *)xrealloc (line, (bufsize <<= 1)); - line[i++] = c; + length += strlen (line + length); + if (length == 0) + /* Possible for example when reading from a binary file where + a line begins with \0. */ + continue; + + if (line[length - 1] == '\n') + break; + + /* fgets() guarantees to read the whole line, or to use up the + space we've given it. We can double the buffer + unconditionally. */ + bufsize <<= 1; + line = xrealloc (line, bufsize); } - if (c == EOF && !i) + if (length == 0 || ferror (fp)) { - free (line); + xfree (line); return NULL; } - /* Check for overflow at zero-termination (no need to double the - buffer in this case. */ - if (i == bufsize) - line = (char *)xrealloc (line, i + 1); - line[i] = '\0'; + if (length + 1 < bufsize) + /* Relieve the memory from our exponential greediness. We say + `length + 1' because the terminating \0 is not included in + LENGTH. We don't need to zero-terminate the string ourselves, + though, because fgets() does that. */ + line = xrealloc (line, length + 1); return line; } + +/* Read FILE into memory. A pointer to `struct file_memory' are + returned; use struct element `content' to access file contents, and + the element `length' to know the file length. `content' is *not* + zero-terminated, and you should *not* read or write beyond the [0, + length) range of characters. -/* Load file pointed to by FP to memory and return the malloc-ed - buffer with the contents. *NREAD will contain the number of read - bytes. The file is loaded in chunks, allocated exponentially, - starting with FILE_BUFFER_SIZE bytes. */ -void -load_file (FILE *fp, char **buf, long *nread) -{ - long bufsize; + After you are done with the file contents, call read_file_free to + release the memory. + + Depending on the operating system and the type of file that is + being read, read_file() either mmap's the file into memory, or + reads the file into the core using read(). - bufsize = 512; - *nread = 0; - *buf = NULL; - while (!feof (fp) && !ferror (fp)) + If file is named "-", fileno(stdin) is used for reading instead. + If you want to read from a real file named "-", use "./-" instead. */ + +struct file_memory * +read_file (const char *file) +{ + int fd; + struct file_memory *fm; + long size; + int inhibit_close = 0; + + /* Some magic in the finest tradition of Perl and its kin: if FILE + is "-", just use stdin. */ + if (HYPHENP (file)) { - *buf = (char *)xrealloc (*buf, bufsize + *nread); - *nread += fread (*buf + *nread, sizeof (char), bufsize, fp); - bufsize <<= 1; + fd = fileno (stdin); + inhibit_close = 1; + /* Note that we don't inhibit mmap() in this case. If stdin is + redirected from a regular file, mmap() will still work. */ + } + else + fd = open (file, O_RDONLY); + if (fd < 0) + return NULL; + fm = xnew (struct file_memory); + +#ifdef HAVE_MMAP + { + struct stat buf; + if (fstat (fd, &buf) < 0) + goto mmap_lose; + fm->length = buf.st_size; + /* NOTE: As far as I know, the callers of this function never + modify the file text. Relying on this would enable us to + specify PROT_READ and MAP_SHARED for a marginal gain in + efficiency, but at some cost to generality. */ + fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd, 0); + if (fm->content == (char *)MAP_FAILED) + goto mmap_lose; + if (!inhibit_close) + close (fd); + + fm->mmap_p = 1; + return fm; + } + + mmap_lose: + /* The most common reason why mmap() fails is that FD does not point + to a plain file. However, it's also possible that mmap() doesn't + work for a particular type of file. Therefore, whenever mmap() + fails, we just fall back to the regular method. */ +#endif /* HAVE_MMAP */ + + fm->length = 0; + size = 512; /* number of bytes fm->contents can + hold at any given time. */ + fm->content = xmalloc (size); + while (1) + { + long nread; + if (fm->length > size / 2) + { + /* #### I'm not sure whether the whole exponential-growth + thing makes sense with kernel read. On Linux at least, + read() refuses to read more than 4K from a file at a + single chunk anyway. But other Unixes might optimize it + better, and it doesn't *hurt* anything, so I'm leaving + it. */ + + /* Normally, we grow SIZE exponentially to make the number + of calls to read() and realloc() logarithmic in relation + to file size. However, read() can read an amount of data + smaller than requested, and it would be unreasonable to + double SIZE every time *something* was read. Therefore, + we double SIZE only when the length exceeds half of the + entire allocated size. */ + size <<= 1; + fm->content = xrealloc (fm->content, size); + } + nread = read (fd, fm->content + fm->length, size - fm->length); + if (nread > 0) + /* Successful read. */ + fm->length += nread; + else if (nread < 0) + /* Error. */ + goto lose; + else + /* EOF */ + break; } - /* #### No indication of encountered error?? */ + if (!inhibit_close) + close (fd); + if (size > fm->length && fm->length != 0) + /* Due to exponential growth of fm->content, the allocated region + might be much larger than what is actually needed. */ + fm->content = xrealloc (fm->content, fm->length); + fm->mmap_p = 0; + return fm; + + lose: + if (!inhibit_close) + close (fd); + xfree (fm->content); + xfree (fm); + return NULL; } +/* Release the resources held by FM. Specifically, this calls + munmap() or xfree() on fm->content, depending whether mmap or + malloc/read were used to read in the file. It also frees the + memory needed to hold the FM structure itself. */ + +void +read_file_free (struct file_memory *fm) +{ +#ifdef HAVE_MMAP + if (fm->mmap_p) + { + munmap (fm->content, fm->length); + } + else +#endif + { + xfree (fm->content); + } + xfree (fm); +} + /* Free the pointers in a NULL-terminated vector of pointers, then free the pointer itself. */ void @@ -760,8 +882,8 @@ free_vec (char **vec) { char **p = vec; while (*p) - free (*p++); - free (vec); + xfree (*p++); + xfree (vec); } } @@ -780,7 +902,7 @@ merge_vecs (char **v1, char **v2) if (!*v2) { /* To avoid j == 0 */ - free (v2); + xfree (v2); return v1; } /* Count v1. */ @@ -790,128 +912,172 @@ merge_vecs (char **v1, char **v2) /* Reallocate v1. */ v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **)); memcpy (v1 + i, v2, (j + 1) * sizeof (char *)); - free (v2); + xfree (v2); return v1; } -/* A set of simple-minded routines to store and search for strings in - a linked list. You may add a string to the slist, and peek whether - it's still in there at any time later. */ +/* A set of simple-minded routines to store strings in a linked list. + This used to also be used for searching, but now we have hash + tables for that. */ + +/* It's a shame that these simple things like linked lists and hash + tables (see hash.c) need to be implemented over and over again. It + would be nice to be able to use the routines from glib -- see + www.gtk.org for details. However, that would make Wget depend on + glib, and I want to avoid dependencies to external libraries for + reasons of convenience and portability (I suspect Wget is more + portable than anything ever written for Gnome). */ + +/* Append an element to the list. If the list has a huge number of + elements, this can get slow because it has to find the list's + ending. If you think you have to call slist_append in a loop, + think about calling slist_prepend() followed by slist_nreverse(). */ -/* Add an element to the list. If flags is NOSORT, the list will not - be sorted. */ slist * -add_slist (slist *l, const char *s, int flags) +slist_append (slist *l, const char *s) { - slist *t, *old, *beg; - int cmp; + slist *newel = xnew (slist); + slist *beg = l; - if (flags & NOSORT) - { - if (!l) - { - t = (slist *)xmalloc (sizeof (slist)); - t->string = xstrdup (s); - t->next = NULL; - return t; - } - beg = l; - /* Find the last element. */ - while (l->next) - l = l->next; - t = (slist *)xmalloc (sizeof (slist)); - l->next = t; - t->string = xstrdup (s); - t->next = NULL; - return beg; - } - /* Empty list or changing the first element. */ - if (!l || (cmp = strcmp (l->string, s)) > 0) - { - t = (slist *)xmalloc (sizeof (slist)); - t->string = xstrdup (s); - t->next = l; - return t; - } - - beg = l; - if (cmp == 0) - return beg; - - /* Second two one-before-the-last element. */ + newel->string = xstrdup (s); + newel->next = NULL; + + if (!l) + return newel; + /* Find the last element. */ while (l->next) - { - old = l; - l = l->next; - cmp = strcmp (s, l->string); - if (cmp == 0) /* no repeating in the list */ - return beg; - else if (cmp > 0) - continue; - /* If the next list element is greater than s, put s between the - current and the next list element. */ - t = (slist *)xmalloc (sizeof (slist)); - old->next = t; - t->next = l; - t->string = xstrdup (s); - return beg; - } - t = (slist *)xmalloc (sizeof (slist)); - t->string = xstrdup (s); - /* Insert the new element after the last element. */ - l->next = t; - t->next = NULL; + l = l->next; + l->next = newel; return beg; } -/* Is there a specific entry in the list? */ -int -in_slist (slist *l, const char *s) +/* Prepend S to the list. Unlike slist_append(), this is O(1). */ + +slist * +slist_prepend (slist *l, const char *s) { - int cmp; + slist *newel = xnew (slist); + newel->string = xstrdup (s); + newel->next = l; + return newel; +} +/* Destructively reverse L. */ + +slist * +slist_nreverse (slist *l) +{ + slist *prev = NULL; while (l) { - cmp = strcmp (l->string, s); - if (cmp == 0) - return 1; - else if (cmp > 0) /* the list is ordered! */ - return 0; - l = l->next; + slist *next = l->next; + l->next = prev; + prev = l; + l = next; } + return prev; +} + +/* Is there a specific entry in the list? */ +int +slist_contains (slist *l, const char *s) +{ + for (; l; l = l->next) + if (!strcmp (l->string, s)) + return 1; return 0; } /* Free the whole slist. */ void -free_slist (slist *l) +slist_free (slist *l) { - slist *n; - while (l) { - n = l->next; - free (l->string); - free (l); + slist *n = l->next; + xfree (l->string); + xfree (l); l = n; } } + +/* Sometimes it's useful to create "sets" of strings, i.e. special + hash tables where you want to store strings as keys and merely + query for their existence. Here is a set of utility routines that + makes that transparent. */ -/* Legible -- return a static pointer to the legibly printed long. */ -char * -legible (long l) +void +string_set_add (struct hash_table *ht, const char *s) +{ + /* First check whether the set element already exists. If it does, + do nothing so that we don't have to free() the old element and + then strdup() a new one. */ + if (hash_table_contains (ht, s)) + return; + + /* We use "1" as value. It provides us a useful and clear arbitrary + value, and it consumes no memory -- the pointers to the same + string "1" will be shared by all the key-value pairs in all `set' + hash tables. */ + hash_table_put (ht, xstrdup (s), "1"); +} + +/* Synonym for hash_table_contains... */ + +int +string_set_contains (struct hash_table *ht, const char *s) { - static char outbuf[20]; - char inbuf[20]; + return hash_table_contains (ht, s); +} + +static int +string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) +{ + xfree (key); + return 0; +} + +void +string_set_free (struct hash_table *ht) +{ + hash_table_map (ht, string_set_free_mapper, NULL); + hash_table_destroy (ht); +} + +static int +free_keys_and_values_mapper (void *key, void *value, void *arg_ignored) +{ + xfree (key); + xfree (value); + return 0; +} + +/* Another utility function: call free() on all keys and values of HT. */ + +void +free_keys_and_values (struct hash_table *ht) +{ + hash_table_map (ht, free_keys_and_values_mapper, NULL); +} + + +/* Engine for legible and legible_large_int; add thousand separators + to numbers printed in strings. */ + +static char * +legible_1 (const char *repr) +{ + static char outbuf[48]; int i, i1, mod; - char *outptr, *inptr; + char *outptr; + const char *inptr; - /* Print the number into the buffer. */ - long_to_string (inbuf, l); /* Reset the pointers. */ outptr = outbuf; - inptr = inbuf; - /* If the number is negative, shift the pointers. */ + inptr = repr; + + /* Ignore the sign for the purpose of adding thousand + separators. */ if (*inptr == '-') { *outptr++ = '-'; @@ -935,44 +1101,753 @@ legible (long l) return outbuf; } +/* Legible -- return a static pointer to the legibly printed long. */ + +char * +legible (long l) +{ + char inbuf[24]; + /* Print the number into the buffer. */ + number_to_string (inbuf, l); + return legible_1 (inbuf); +} + +/* Write a string representation of LARGE_INT NUMBER into the provided + buffer. The buffer should be able to accept 24 characters, + including the terminating zero. + + It would be dangerous to use sprintf, because the code wouldn't + work on a machine with gcc-provided long long support, but without + libc support for "%lld". However, such platforms will typically + not have snprintf and will use our version, which does support + "%lld" where long longs are available. */ + +static void +large_int_to_string (char *buffer, LARGE_INT number) +{ + snprintf (buffer, 24, LARGE_INT_FMT, number); +} + +/* The same as legible(), but works on LARGE_INT. */ + +char * +legible_large_int (LARGE_INT l) +{ + char inbuf[48]; + large_int_to_string (inbuf, l); + return legible_1 (inbuf); +} + /* Count the digits in a (long) integer. */ int -numdigit (long a) +numdigit (long number) { - int res = 1; - while ((a /= 10) != 0) - ++res; - return res; + int cnt = 1; + if (number < 0) + { + number = -number; + ++cnt; + } + while ((number /= 10) > 0) + ++cnt; + return cnt; +} + +/* A half-assed implementation of INT_MAX on machines that don't + bother to define one. */ +#ifndef INT_MAX +# define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1)) +#endif + +#define ONE_DIGIT(figure) *p++ = n / (figure) + '0' +#define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure)) + +#define DIGITS_1(figure) ONE_DIGIT (figure) +#define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10) +#define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10) +#define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10) +#define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10) +#define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10) +#define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10) +#define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10) +#define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10) +#define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10) + +/* DIGITS_<11-20> are only used on machines with 64-bit longs. */ + +#define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10) +#define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10) +#define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10) +#define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10) +#define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10) +#define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10) +#define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10) +#define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10) +#define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10) + +/* Print NUMBER to BUFFER in base 10. This should be completely + equivalent to `sprintf(buffer, "%ld", number)', only much faster. + + The speedup may make a difference in programs that frequently + convert numbers to strings. Some implementations of sprintf, + particularly the one in GNU libc, have been known to be extremely + slow compared to this function. + + Return the pointer to the location where the terminating zero was + printed. (Equivalent to calling buffer+strlen(buffer) after the + function is done.) + + BUFFER should be big enough to accept as many bytes as you expect + the number to take up. On machines with 64-bit longs the maximum + needed size is 24 bytes. That includes the digits needed for the + largest 64-bit number, the `-' sign in case it's negative, and the + terminating '\0'. */ + +char * +number_to_string (char *buffer, long number) +{ + char *p = buffer; + long n = number; + +#if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8) + /* We are running in a strange or misconfigured environment. Let + sprintf cope with it. */ + sprintf (buffer, "%ld", n); + p += strlen (buffer); +#else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */ + + if (n < 0) + { + if (n < -INT_MAX) + { + /* We cannot print a '-' and assign -n to n because -n would + overflow. Let sprintf deal with this border case. */ + sprintf (buffer, "%ld", n); + p += strlen (buffer); + return p; + } + + *p++ = '-'; + n = -n; + } + + if (n < 10) { DIGITS_1 (1); } + else if (n < 100) { DIGITS_2 (10); } + else if (n < 1000) { DIGITS_3 (100); } + else if (n < 10000) { DIGITS_4 (1000); } + else if (n < 100000) { DIGITS_5 (10000); } + else if (n < 1000000) { DIGITS_6 (100000); } + else if (n < 10000000) { DIGITS_7 (1000000); } + else if (n < 100000000) { DIGITS_8 (10000000); } + else if (n < 1000000000) { DIGITS_9 (100000000); } +#if SIZEOF_LONG == 4 + /* ``if (1)'' serves only to preserve editor indentation. */ + else if (1) { DIGITS_10 (1000000000); } +#else /* SIZEOF_LONG != 4 */ + else if (n < 10000000000L) { DIGITS_10 (1000000000L); } + else if (n < 100000000000L) { DIGITS_11 (10000000000L); } + else if (n < 1000000000000L) { DIGITS_12 (100000000000L); } + else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); } + else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); } + else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); } + else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); } + else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); } + else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); } + else { DIGITS_19 (1000000000000000000L); } +#endif /* SIZEOF_LONG != 4 */ + + *p = '\0'; +#endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */ + + return p; } -/* Print NUMBER to BUFFER. The digits are first written in reverse - order (the least significant digit first), and are then reversed. */ +#undef ONE_DIGIT +#undef ONE_DIGIT_ADVANCE + +#undef DIGITS_1 +#undef DIGITS_2 +#undef DIGITS_3 +#undef DIGITS_4 +#undef DIGITS_5 +#undef DIGITS_6 +#undef DIGITS_7 +#undef DIGITS_8 +#undef DIGITS_9 +#undef DIGITS_10 +#undef DIGITS_11 +#undef DIGITS_12 +#undef DIGITS_13 +#undef DIGITS_14 +#undef DIGITS_15 +#undef DIGITS_16 +#undef DIGITS_17 +#undef DIGITS_18 +#undef DIGITS_19 + +/* Support for timers. */ + +#undef TIMER_WINDOWS +#undef TIMER_GETTIMEOFDAY +#undef TIMER_TIME + +/* Depending on the OS and availability of gettimeofday(), one and + only one of the above constants will be defined. Virtually all + modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will + use TIMER_WINDOWS. TIMER_TIME is a catch-all method for + non-Windows systems without gettimeofday. + + #### Perhaps we should also support ftime(), which exists on old + BSD 4.2-influenced systems? (It also existed under MS DOS Borland + C, if memory serves me.) */ + +#ifdef WINDOWS +# define TIMER_WINDOWS +#else /* not WINDOWS */ +# ifdef HAVE_GETTIMEOFDAY +# define TIMER_GETTIMEOFDAY +# else +# define TIMER_TIME +# endif +#endif /* not WINDOWS */ + +#ifdef TIMER_GETTIMEOFDAY +typedef struct timeval wget_sys_time; +#endif + +#ifdef TIMER_TIME +typedef time_t wget_sys_time; +#endif + +#ifdef TIMER_WINDOWS +typedef ULARGE_INTEGER wget_sys_time; +#endif + +struct wget_timer { + /* The starting point in time which, subtracted from the current + time, yields elapsed time. */ + wget_sys_time start; + + /* The most recent elapsed time, calculated by wtimer_elapsed(). + Measured in milliseconds. */ + double elapsed_last; + + /* Approximately, the time elapsed between the true start of the + measurement and the time represented by START. */ + double elapsed_pre_start; +}; + +/* Allocate a timer. It is not legal to do anything with a freshly + allocated timer, except call wtimer_reset() or wtimer_delete(). */ + +struct wget_timer * +wtimer_allocate (void) +{ + struct wget_timer *wt = xnew (struct wget_timer); + return wt; +} + +/* Allocate a new timer and reset it. Return the new timer. */ + +struct wget_timer * +wtimer_new (void) +{ + struct wget_timer *wt = wtimer_allocate (); + wtimer_reset (wt); + return wt; +} + +/* Free the resources associated with the timer. Its further use is + prohibited. */ + void -long_to_string (char *buffer, long number) +wtimer_delete (struct wget_timer *wt) { - char *p; - int i, l; + xfree (wt); +} - if (number < 0) +/* Store system time to WST. */ + +static void +wtimer_sys_set (wget_sys_time *wst) +{ +#ifdef TIMER_GETTIMEOFDAY + gettimeofday (wst, NULL); +#endif + +#ifdef TIMER_TIME + time (wst); +#endif + +#ifdef TIMER_WINDOWS + /* We use GetSystemTime to get the elapsed time. MSDN warns that + system clock adjustments can skew the output of GetSystemTime + when used as a timer and gives preference to GetTickCount and + high-resolution timers. But GetTickCount can overflow, and hires + timers are typically used for profiling, not for regular time + measurement. Since we handle clock skew anyway, we just use + GetSystemTime. */ + FILETIME ft; + SYSTEMTIME st; + GetSystemTime (&st); + + /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy + FILETIME to ULARGE_INTEGER, and use regular 64-bit integer + arithmetic on that. */ + SystemTimeToFileTime (&st, &ft); + wst->HighPart = ft.dwHighDateTime; + wst->LowPart = ft.dwLowDateTime; +#endif +} + +/* Reset timer WT. This establishes the starting point from which + wtimer_elapsed() will return the number of elapsed + milliseconds. It is allowed to reset a previously used timer. */ + +void +wtimer_reset (struct wget_timer *wt) +{ + /* Set the start time to the current time. */ + wtimer_sys_set (&wt->start); + wt->elapsed_last = 0; + wt->elapsed_pre_start = 0; +} + +static double +wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2) +{ +#ifdef TIMER_GETTIMEOFDAY + return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000 + + (double)(wst1->tv_usec - wst2->tv_usec) / 1000); +#endif + +#ifdef TIMER_TIME + return 1000 * (*wst1 - *wst2); +#endif + +#ifdef WINDOWS + /* VC++ 6 doesn't support direct cast of uint64 to double. To work + around this, we subtract, then convert to signed, then finally to + double. */ + return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000; +#endif +} + +/* Return the number of milliseconds elapsed since the timer was last + reset. It is allowed to call this function more than once to get + increasingly higher elapsed values. These timers handle clock + skew. */ + +double +wtimer_elapsed (struct wget_timer *wt) +{ + wget_sys_time now; + double elapsed; + + wtimer_sys_set (&now); + elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start); + + /* Ideally we'd just return the difference between NOW and + wt->start. However, the system timer can be set back, and we + could return a value smaller than when we were last called, even + a negative value. Both of these would confuse the callers, which + expect us to return monotonically nondecreasing values. + + Therefore: if ELAPSED is smaller than its previous known value, + we reset wt->start to the current time and effectively start + measuring from this point. But since we don't want the elapsed + value to start from zero, we set elapsed_pre_start to the last + elapsed time and increment all future calculations by that + amount. */ + + if (elapsed < wt->elapsed_last) { - *buffer++ = '-'; - number = -number; + wt->start = now; + wt->elapsed_pre_start = wt->elapsed_last; + elapsed = wt->elapsed_last; } - p = buffer; - /* Print the digits to the string. */ - do + + wt->elapsed_last = elapsed; + return elapsed; +} + +/* Return the assessed granularity of the timer implementation, in + milliseconds. This is used by code that tries to substitute a + better value for timers that have returned zero. */ + +double +wtimer_granularity (void) +{ +#ifdef TIMER_GETTIMEOFDAY + /* Granularity of gettimeofday varies wildly between architectures. + However, it appears that on modern machines it tends to be better + than 1ms. Assume 100 usecs. (Perhaps the configure process + could actually measure this?) */ + return 0.1; +#endif + +#ifdef TIMER_TIME + return 1000; +#endif + +#ifdef TIMER_WINDOWS + /* According to MSDN, GetSystemTime returns a broken-down time + structure the smallest member of which are milliseconds. */ + return 1; +#endif +} + +/* This should probably be at a better place, but it doesn't really + fit into html-parse.c. */ + +/* The function returns the pointer to the malloc-ed quoted version of + string s. It will recognize and quote numeric and special graphic + entities, as per RFC1866: + + `&' -> `&' + `<' -> `<' + `>' -> `>' + `"' -> `"' + SP -> ` ' + + No other entities are recognized or replaced. */ +char * +html_quote_string (const char *s) +{ + const char *b = s; + char *p, *res; + int i; + + /* Pass through the string, and count the new size. */ + for (i = 0; *s; s++, i++) + { + if (*s == '&') + i += 4; /* `amp;' */ + else if (*s == '<' || *s == '>') + i += 3; /* `lt;' and `gt;' */ + else if (*s == '\"') + i += 5; /* `quot;' */ + else if (*s == ' ') + i += 4; /* #32; */ + } + res = (char *)xmalloc (i + 1); + s = b; + for (p = res; *s; s++) + { + switch (*s) + { + case '&': + *p++ = '&'; + *p++ = 'a'; + *p++ = 'm'; + *p++ = 'p'; + *p++ = ';'; + break; + case '<': case '>': + *p++ = '&'; + *p++ = (*s == '<' ? 'l' : 'g'); + *p++ = 't'; + *p++ = ';'; + break; + case '\"': + *p++ = '&'; + *p++ = 'q'; + *p++ = 'u'; + *p++ = 'o'; + *p++ = 't'; + *p++ = ';'; + break; + case ' ': + *p++ = '&'; + *p++ = '#'; + *p++ = '3'; + *p++ = '2'; + *p++ = ';'; + break; + default: + *p++ = *s; + } + } + *p = '\0'; + return res; +} + +/* Determine the width of the terminal we're running on. If that's + not possible, return 0. */ + +int +determine_screen_width (void) +{ + /* If there's a way to get the terminal size using POSIX + tcgetattr(), somebody please tell me. */ +#ifndef TIOCGWINSZ + return 0; +#else /* TIOCGWINSZ */ + int fd; + struct winsize wsz; + + if (opt.lfilename != NULL) + return 0; + + fd = fileno (stderr); + if (ioctl (fd, TIOCGWINSZ, &wsz) < 0) + return 0; /* most likely ENOTTY */ + + return wsz.ws_col; +#endif /* TIOCGWINSZ */ +} + +/* Return a random number between 0 and MAX-1, inclusive. + + If MAX is greater than the value of RAND_MAX+1 on the system, the + returned value will be in the range [0, RAND_MAX]. This may be + fixed in a future release. + + The random number generator is seeded automatically the first time + it is called. + + This uses rand() for portability. It has been suggested that + random() offers better randomness, but this is not required for + Wget, so I chose to go for simplicity and use rand + unconditionally. + + DO NOT use this for cryptographic purposes. It is only meant to be + used in situations where quality of the random numbers returned + doesn't really matter. */ + +int +random_number (int max) +{ + static int seeded; + double bounded; + int rnd; + + if (!seeded) + { + srand (time (NULL)); + seeded = 1; + } + rnd = rand (); + + /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1, + and enforce that assumption by masking other bits. */ +#ifndef RAND_MAX +# define RAND_MAX 32767 + rnd &= RAND_MAX; +#endif + + /* This is equivalent to rand() % max, but uses the high-order bits + for better randomness on architecture where rand() is implemented + using a simple congruential generator. */ + + bounded = (double)max * rnd / (RAND_MAX + 1.0); + return (int)bounded; +} + +/* Return a random uniformly distributed floating point number in the + [0, 1) range. The precision of returned numbers is 9 digits. + + Modify this to use erand48() where available! */ + +double +random_float (void) +{ + /* We can't rely on any specific value of RAND_MAX, but I'm pretty + sure it's greater than 1000. */ + int rnd1 = random_number (1000); + int rnd2 = random_number (1000); + int rnd3 = random_number (1000); + return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0; +} + +#if 0 +/* A debugging function for checking whether an MD5 library works. */ + +#include "gen-md5.h" + +char * +debug_test_md5 (char *buf) +{ + unsigned char raw[16]; + static char res[33]; + unsigned char *p1; + char *p2; + int cnt; + ALLOCA_MD5_CONTEXT (ctx); + + gen_md5_init (ctx); + gen_md5_update ((unsigned char *)buf, strlen (buf), ctx); + gen_md5_finish (ctx, raw); + + p1 = raw; + p2 = res; + cnt = 16; + while (cnt--) { - *p++ = number % 10 + '0'; - number /= 10; + *p2++ = XNUM_TO_digit (*p1 >> 4); + *p2++ = XNUM_TO_digit (*p1 & 0xf); + ++p1; } - while (number); - /* And reverse them. */ - l = p - buffer - 1; - for (i = l/2; i >= 0; i--) + *p2 = '\0'; + + return res; +} +#endif + +/* Implementation of run_with_timeout, a generic timeout-forcing + routine for systems with Unix-like signal handling. */ + +#ifdef USE_SIGNAL_TIMEOUT +# ifdef HAVE_SIGSETJMP +# define SETJMP(env) sigsetjmp (env, 1) + +static sigjmp_buf run_with_timeout_env; + +static RETSIGTYPE +abort_run_with_timeout (int sig) +{ + assert (sig == SIGALRM); + siglongjmp (run_with_timeout_env, -1); +} +# else /* not HAVE_SIGSETJMP */ +# define SETJMP(env) setjmp (env) + +static jmp_buf run_with_timeout_env; + +static RETSIGTYPE +abort_run_with_timeout (int sig) +{ + assert (sig == SIGALRM); + /* We don't have siglongjmp to preserve the set of blocked signals; + if we longjumped out of the handler at this point, SIGALRM would + remain blocked. We must unblock it manually. */ + int mask = siggetmask (); + mask &= ~sigmask (SIGALRM); + sigsetmask (mask); + + /* Now it's safe to longjump. */ + longjmp (run_with_timeout_env, -1); +} +# endif /* not HAVE_SIGSETJMP */ + +/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses + setitimer where available, alarm otherwise. + + TIMEOUT should be non-zero. If the timeout value is so small that + it would be rounded to zero, it is rounded to the least legal value + instead (1us for setitimer, 1s for alarm). That ensures that + SIGALRM will be delivered in all cases. */ + +static void +alarm_set (double timeout) +{ +#ifdef ITIMER_REAL + /* Use the modern itimer interface. */ + struct itimerval itv; + xzero (itv); + itv.it_value.tv_sec = (long) timeout; + itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout); + if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0) + /* Ensure that we wait for at least the minimum interval. + Specifying zero would mean "wait forever". */ + itv.it_value.tv_usec = 1; + setitimer (ITIMER_REAL, &itv, NULL); +#else /* not ITIMER_REAL */ + /* Use the old alarm() interface. */ + int secs = (int) timeout; + if (secs == 0) + /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is + because alarm(0) means "never deliver the alarm", i.e. "wait + forever", which is not what someone who specifies a 0.5s + timeout would expect. */ + secs = 1; + alarm (secs); +#endif /* not ITIMER_REAL */ +} + +/* Cancel the alarm set with alarm_set. */ + +static void +alarm_cancel (void) +{ +#ifdef ITIMER_REAL + struct itimerval disable; + xzero (disable); + setitimer (ITIMER_REAL, &disable, NULL); +#else /* not ITIMER_REAL */ + alarm (0); +#endif /* not ITIMER_REAL */ +} + +/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT + seconds. Returns non-zero if the function was interrupted with a + timeout, zero otherwise. + + This works by setting up SIGALRM to be delivered in TIMEOUT seconds + using setitimer() or alarm(). The timeout is enforced by + longjumping out of the SIGALRM handler. This has several + advantages compared to the traditional approach of relying on + signals causing system calls to exit with EINTR: + + * The callback function is *forcibly* interrupted after the + timeout expires, (almost) regardless of what it was doing and + whether it was in a syscall. For example, a calculation that + takes a long time is interrupted as reliably as an IO + operation. + + * It works with both SYSV and BSD signals because it doesn't + depend on the default setting of SA_RESTART. + + * It doesn't special handler setup beyond a simple call to + signal(). (It does use sigsetjmp/siglongjmp, but they're + optional.) + + The only downside is that, if FUN allocates internal resources that + are normally freed prior to exit from the functions, they will be + lost in case of timeout. */ + +int +run_with_timeout (double timeout, void (*fun) (void *), void *arg) +{ + int saved_errno; + + if (timeout == 0) + { + fun (arg); + return 0; + } + + signal (SIGALRM, abort_run_with_timeout); + if (SETJMP (run_with_timeout_env) != 0) { - char c = buffer[i]; - buffer[i] = buffer[l - i]; - buffer[l - i] = c; + /* Longjumped out of FUN with a timeout. */ + signal (SIGALRM, SIG_DFL); + return 1; } - buffer[l + 1] = '\0'; + alarm_set (timeout); + fun (arg); + + /* Preserve errno in case alarm() or signal() modifies it. */ + saved_errno = errno; + alarm_cancel (); + signal (SIGALRM, SIG_DFL); + errno = saved_errno; + + return 0; +} + +#else /* not USE_SIGNAL_TIMEOUT */ + +#ifndef WINDOWS +/* A stub version of run_with_timeout that just calls FUN(ARG). Don't + define it under Windows, because Windows has its own version of + run_with_timeout that uses threads. */ + +int +run_with_timeout (double timeout, void (*fun) (void *), void *arg) +{ + fun (arg); + return 0; } +#endif /* not WINDOWS */ +#endif /* not USE_SIGNAL_TIMEOUT */