1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #else /* not HAVE_STRING_H */
29 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
54 /* For TIOCGWINSZ and friends: */
55 #ifdef HAVE_SYS_IOCTL_H
56 # include <sys/ioctl.h>
62 /* Needed for run_with_timeout. */
63 #undef USE_SIGNAL_TIMEOUT
70 /* If sigsetjmp is a macro, configure won't pick it up. */
72 # define HAVE_SIGSETJMP
75 # ifdef HAVE_SIGSETJMP
76 # define USE_SIGNAL_TIMEOUT
79 # define USE_SIGNAL_TIMEOUT
92 /* This section implements several wrappers around the basic
93 allocation routines. This is done for two reasons: first, so that
94 the callers of these functions need not consistently check for
95 errors. If there is not enough virtual memory for running Wget,
96 something is seriously wrong, and Wget exits with an appropriate
99 The second reason why these are useful is that, if DEBUG_MALLOC is
100 defined, they also provide a handy (if crude) malloc debugging
101 interface that checks memory leaks. */
103 /* Croak the fatal memory error and bail out with non-zero exit
106 memfatal (const char *what)
108 /* Make sure we don't try to store part of the log line, and thus
110 log_set_save_context (0);
111 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
115 /* These functions end with _real because they need to be
116 distinguished from the debugging functions, and from the macros.
119 If memory debugging is not turned on, wget.h defines these:
121 #define xmalloc xmalloc_real
122 #define xrealloc xrealloc_real
123 #define xstrdup xstrdup_real
126 In case of memory debugging, the definitions are a bit more
127 complex, because we want to provide more information, *and* we want
128 to call the debugging code. (The former is the reason why xmalloc
129 and friends need to be macros in the first place.) Then it looks
132 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
133 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
134 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
135 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
137 Each of the *_debug function does its magic and calls the real one. */
140 # define STATIC_IF_DEBUG static
142 # define STATIC_IF_DEBUG
145 STATIC_IF_DEBUG void *
146 xmalloc_real (size_t size)
148 void *ptr = malloc (size);
154 STATIC_IF_DEBUG void *
155 xrealloc_real (void *ptr, size_t newsize)
159 /* Not all Un*xes have the feature of realloc() that calling it with
160 a NULL-pointer is the same as malloc(), but it is easy to
163 newptr = realloc (ptr, newsize);
165 newptr = malloc (newsize);
167 memfatal ("realloc");
171 STATIC_IF_DEBUG char *
172 xstrdup_real (const char *s)
178 copy = malloc (l + 1);
181 memcpy (copy, s, l + 1);
182 #else /* HAVE_STRDUP */
186 #endif /* HAVE_STRDUP */
193 /* Crude home-grown routines for debugging some malloc-related
196 * Counting the number of malloc and free invocations, and reporting
197 the "balance", i.e. how many times more malloc was called than it
198 was the case with free.
200 * Making malloc store its entry into a simple array and free remove
201 stuff from that array. At the end, print the pointers which have
202 not been freed, along with the source file and the line number.
203 This also has the side-effect of detecting freeing memory that
206 Note that this kind of memory leak checking strongly depends on
207 every malloc() being followed by a free(), even if the program is
208 about to finish. Wget is careful to free the data structure it
209 allocated in init.c. */
211 static int malloc_count, free_count;
217 } malloc_debug[100000];
219 /* Both register_ptr and unregister_ptr take O(n) operations to run,
220 which can be a real problem. It would be nice to use a hash table
221 for malloc_debug, but the functions in hash.c are not suitable
222 because they can call malloc() themselves. Maybe it would work if
223 the hash table were preallocated to a huge size, and if we set the
224 rehash threshold to 1.0. */
226 /* Register PTR in malloc_debug. Abort if this is not possible
227 (presumably due to the number of current allocations exceeding the
228 size of malloc_debug.) */
231 register_ptr (void *ptr, const char *file, int line)
234 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
235 if (malloc_debug[i].ptr == NULL)
237 malloc_debug[i].ptr = ptr;
238 malloc_debug[i].file = file;
239 malloc_debug[i].line = line;
245 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
246 malloc_debug. (This catches calling free() with a bogus pointer.) */
249 unregister_ptr (void *ptr)
252 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
253 if (malloc_debug[i].ptr == ptr)
255 malloc_debug[i].ptr = NULL;
261 /* Print the malloc debug stats that can be gathered from the above
262 information. Currently this is the count of mallocs, frees, the
263 difference between the two, and the dump of the contents of
264 malloc_debug. The last part are the memory leaks. */
267 print_malloc_debug_stats (void)
270 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
271 malloc_count, free_count, malloc_count - free_count);
272 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
273 if (malloc_debug[i].ptr != NULL)
274 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
275 malloc_debug[i].file, malloc_debug[i].line);
279 xmalloc_debug (size_t size, const char *source_file, int source_line)
281 void *ptr = xmalloc_real (size);
283 register_ptr (ptr, source_file, source_line);
288 xfree_debug (void *ptr, const char *source_file, int source_line)
290 assert (ptr != NULL);
292 unregister_ptr (ptr);
297 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
299 void *newptr = xrealloc_real (ptr, newsize);
303 register_ptr (newptr, source_file, source_line);
305 else if (newptr != ptr)
307 unregister_ptr (ptr);
308 register_ptr (newptr, source_file, source_line);
314 xstrdup_debug (const char *s, const char *source_file, int source_line)
316 char *copy = xstrdup_real (s);
318 register_ptr (copy, source_file, source_line);
322 #endif /* DEBUG_MALLOC */
324 /* Utility function: like xstrdup(), but also lowercases S. */
327 xstrdup_lower (const char *s)
329 char *copy = xstrdup (s);
336 /* Return a count of how many times CHR occurs in STRING. */
339 count_char (const char *string, char chr)
343 for (p = string; *p; p++)
349 /* Copy the string formed by two pointers (one on the beginning, other
350 on the char after the last char) to a new, malloc-ed location.
353 strdupdelim (const char *beg, const char *end)
355 char *res = (char *)xmalloc (end - beg + 1);
356 memcpy (res, beg, end - beg);
357 res[end - beg] = '\0';
361 /* Parse a string containing comma-separated elements, and return a
362 vector of char pointers with the elements. Spaces following the
363 commas are ignored. */
365 sepstring (const char *s)
379 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
380 res[i] = strdupdelim (p, s);
383 /* Skip the blanks following the ','. */
391 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
392 res[i] = strdupdelim (p, s);
397 /* Return pointer to a static char[] buffer in which zero-terminated
398 string-representation of TM (in form hh:mm:ss) is printed.
400 If TM is non-NULL, the current time-in-seconds will be stored
403 (#### This is misleading: one would expect TM would be used instead
404 of the current time in that case. This design was probably
405 influenced by the design time(2), and should be changed at some
406 points. No callers use non-NULL TM anyway.) */
409 time_str (time_t *tm)
411 static char output[15];
413 time_t secs = time (tm);
417 /* In case of error, return the empty string. Maybe we should
418 just abort if this happens? */
422 ptm = localtime (&secs);
423 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
427 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
430 datetime_str (time_t *tm)
432 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
434 time_t secs = time (tm);
438 /* In case of error, return the empty string. Maybe we should
439 just abort if this happens? */
443 ptm = localtime (&secs);
444 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
445 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
446 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
450 /* The Windows versions of the following two functions are defined in
455 fork_to_background (void)
458 /* Whether we arrange our own version of opt.lfilename here. */
463 opt.lfilename = unique_name (DEFAULT_LOGFILE);
475 /* parent, no error */
476 printf (_("Continuing in background, pid %d.\n"), (int)pid);
478 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
479 exit (0); /* #### should we use _exit()? */
482 /* child: give up the privileges and keep running. */
484 freopen ("/dev/null", "r", stdin);
485 freopen ("/dev/null", "w", stdout);
486 freopen ("/dev/null", "w", stderr);
488 #endif /* not WINDOWS */
490 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
491 specified with TM. */
493 touch (const char *file, time_t tm)
495 #ifdef HAVE_STRUCT_UTIMBUF
496 struct utimbuf times;
497 times.actime = times.modtime = tm;
500 times[0] = times[1] = tm;
503 if (utime (file, ×) == -1)
504 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
507 /* Checks if FILE is a symbolic link, and removes it if it is. Does
508 nothing under MS-Windows. */
510 remove_link (const char *file)
515 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
517 DEBUGP (("Unlinking %s (symlink).\n", file));
520 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
521 file, strerror (errno));
526 /* Does FILENAME exist? This is quite a lousy implementation, since
527 it supplies no error codes -- only a yes-or-no answer. Thus it
528 will return that a file does not exist if, e.g., the directory is
529 unreadable. I don't mind it too much currently, though. The
530 proper way should, of course, be to have a third, error state,
531 other than true/false, but that would introduce uncalled-for
532 additional complexity to the callers. */
534 file_exists_p (const char *filename)
537 return access (filename, F_OK) >= 0;
540 return stat (filename, &buf) >= 0;
544 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
545 Returns 0 on error. */
547 file_non_directory_p (const char *path)
550 /* Use lstat() rather than stat() so that symbolic links pointing to
551 directories can be identified correctly. */
552 if (lstat (path, &buf) != 0)
554 return S_ISDIR (buf.st_mode) ? 0 : 1;
557 /* Return the size of file named by FILENAME, or -1 if it cannot be
558 opened or seeked into. */
560 file_size (const char *filename)
563 /* We use fseek rather than stat to determine the file size because
564 that way we can also verify whether the file is readable.
565 Inspired by the POST patch by Arnaud Wylie. */
566 FILE *fp = fopen (filename, "rb");
567 fseek (fp, 0, SEEK_END);
573 /* Return a unique filename, given a prefix and count */
575 unique_name_1 (const char *fileprefix, int count)
581 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
582 sprintf (filename, "%s.%d", fileprefix, count);
585 filename = xstrdup (fileprefix);
587 if (!file_exists_p (filename))
596 /* Return a unique file name, based on PREFIX. */
598 unique_name (const char *prefix)
604 file = unique_name_1 (prefix, count++);
608 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
609 are missing, create them first. In case any mkdir() call fails,
610 return its error status. Returns 0 on successful completion.
612 The behaviour of this function should be identical to the behaviour
613 of `mkdir -p' on systems where mkdir supports the `-p' option. */
615 make_directory (const char *directory)
622 /* Make a copy of dir, to be able to write to it. Otherwise, the
623 function is unsafe if called with a read-only char *argument. */
624 STRDUP_ALLOCA (dir, directory);
626 /* If the first character of dir is '/', skip it (and thus enable
627 creation of absolute-pathname directories. */
628 for (i = (*dir == '/'); 1; ++i)
630 for (; dir[i] && dir[i] != '/'; i++)
635 /* Check whether the directory already exists. Allow creation of
636 of intermediate directories to fail, as the initial path components
637 are not necessarily directories! */
638 if (!file_exists_p (dir))
639 ret = mkdir (dir, 0777);
650 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
651 should be a file name.
653 file_merge("/foo/bar", "baz") => "/foo/baz"
654 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
655 file_merge("foo", "bar") => "bar"
657 In other words, it's a simpler and gentler version of uri_merge_1. */
660 file_merge (const char *base, const char *file)
663 const char *cut = (const char *)strrchr (base, '/');
666 return xstrdup (file);
668 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
669 memcpy (result, base, cut - base);
670 result[cut - base] = '/';
671 strcpy (result + (cut - base) + 1, file);
676 static int in_acclist PARAMS ((const char *const *, const char *, int));
678 /* Determine whether a file is acceptable to be followed, according to
679 lists of patterns to accept/reject. */
681 acceptable (const char *s)
685 while (l && s[l] != '/')
692 return (in_acclist ((const char *const *)opt.accepts, s, 1)
693 && !in_acclist ((const char *const *)opt.rejects, s, 1));
695 return in_acclist ((const char *const *)opt.accepts, s, 1);
697 else if (opt.rejects)
698 return !in_acclist ((const char *const *)opt.rejects, s, 1);
702 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
703 `/something', frontcmp() will return 1 only if S2 begins with
704 `/something'. Otherwise, 0 is returned. */
706 frontcmp (const char *s1, const char *s2)
708 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
712 /* Iterate through STRLIST, and return the first element that matches
713 S, through wildcards or front comparison (as appropriate). */
715 proclist (char **strlist, const char *s, enum accd flags)
719 for (x = strlist; *x; x++)
720 if (has_wildcards_p (*x))
722 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
727 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
734 /* Returns whether DIRECTORY is acceptable for download, wrt the
735 include/exclude lists.
737 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
738 and absolute paths may be freely intermixed. */
740 accdir (const char *directory, enum accd flags)
742 /* Remove starting '/'. */
743 if (flags & ALLABS && *directory == '/')
747 if (!proclist (opt.includes, directory, flags))
752 if (proclist (opt.excludes, directory, flags))
758 /* Return non-zero if STRING ends with TAIL. For instance:
760 match_tail ("abc", "bc", 0) -> 1
761 match_tail ("abc", "ab", 0) -> 0
762 match_tail ("abc", "abc", 0) -> 1
764 If FOLD_CASE_P is non-zero, the comparison will be
768 match_tail (const char *string, const char *tail, int fold_case_p)
772 /* We want this to be fast, so we code two loops, one with
773 case-folding, one without. */
777 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
778 if (string[i] != tail[j])
783 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
784 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
788 /* If the tail was exhausted, the match was succesful. */
795 /* Checks whether string S matches each element of ACCEPTS. A list
796 element are matched either with fnmatch() or match_tail(),
797 according to whether the element contains wildcards or not.
799 If the BACKWARD is 0, don't do backward comparison -- just compare
802 in_acclist (const char *const *accepts, const char *s, int backward)
804 for (; *accepts; accepts++)
806 if (has_wildcards_p (*accepts))
808 /* fnmatch returns 0 if the pattern *does* match the
810 if (fnmatch (*accepts, s, 0) == 0)
817 if (match_tail (s, *accepts, 0))
822 if (!strcmp (s, *accepts))
830 /* Return the location of STR's suffix (file extension). Examples:
831 suffix ("foo.bar") -> "bar"
832 suffix ("foo.bar.baz") -> "baz"
833 suffix ("/foo/bar") -> NULL
834 suffix ("/foo.bar/baz") -> NULL */
836 suffix (const char *str)
840 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
844 return (char *)str + i;
849 /* Return non-zero if FNAME ends with a typical HTML suffix. The
850 following (case-insensitive) suffixes are presumed to be HTML files:
854 ?html (`?' matches one character)
856 #### CAVEAT. This is not necessarily a good indication that FNAME
857 refers to a file that contains HTML! */
859 has_html_suffix_p (const char *fname)
863 if ((suf = suffix (fname)) == NULL)
865 if (!strcasecmp (suf, "html"))
867 if (!strcasecmp (suf, "htm"))
869 if (suf[0] && !strcasecmp (suf + 1, "html"))
874 /* Read a line from FP and return the pointer to freshly allocated
875 storage. The stoarage space is obtained through malloc() and
876 should be freed with free() when it is no longer needed.
878 The length of the line is not limited, except by available memory.
879 The newline character at the end of line is retained. The line is
880 terminated with a zero character.
882 After end-of-file is encountered without anything being read, NULL
883 is returned. NULL is also returned on error. To distinguish
884 between these two cases, use the stdio function ferror(). */
887 read_whole_line (FILE *fp)
891 char *line = (char *)xmalloc (bufsize);
893 while (fgets (line + length, bufsize - length, fp))
895 length += strlen (line + length);
897 /* Possible for example when reading from a binary file where
898 a line begins with \0. */
901 if (line[length - 1] == '\n')
904 /* fgets() guarantees to read the whole line, or to use up the
905 space we've given it. We can double the buffer
908 line = xrealloc (line, bufsize);
910 if (length == 0 || ferror (fp))
915 if (length + 1 < bufsize)
916 /* Relieve the memory from our exponential greediness. We say
917 `length + 1' because the terminating \0 is not included in
918 LENGTH. We don't need to zero-terminate the string ourselves,
919 though, because fgets() does that. */
920 line = xrealloc (line, length + 1);
924 /* Read FILE into memory. A pointer to `struct file_memory' are
925 returned; use struct element `content' to access file contents, and
926 the element `length' to know the file length. `content' is *not*
927 zero-terminated, and you should *not* read or write beyond the [0,
928 length) range of characters.
930 After you are done with the file contents, call read_file_free to
933 Depending on the operating system and the type of file that is
934 being read, read_file() either mmap's the file into memory, or
935 reads the file into the core using read().
937 If file is named "-", fileno(stdin) is used for reading instead.
938 If you want to read from a real file named "-", use "./-" instead. */
941 read_file (const char *file)
944 struct file_memory *fm;
946 int inhibit_close = 0;
948 /* Some magic in the finest tradition of Perl and its kin: if FILE
949 is "-", just use stdin. */
954 /* Note that we don't inhibit mmap() in this case. If stdin is
955 redirected from a regular file, mmap() will still work. */
958 fd = open (file, O_RDONLY);
961 fm = xmalloc (sizeof (struct file_memory));
966 if (fstat (fd, &buf) < 0)
968 fm->length = buf.st_size;
969 /* NOTE: As far as I know, the callers of this function never
970 modify the file text. Relying on this would enable us to
971 specify PROT_READ and MAP_SHARED for a marginal gain in
972 efficiency, but at some cost to generality. */
973 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
975 if (fm->content == (char *)MAP_FAILED)
985 /* The most common reason why mmap() fails is that FD does not point
986 to a plain file. However, it's also possible that mmap() doesn't
987 work for a particular type of file. Therefore, whenever mmap()
988 fails, we just fall back to the regular method. */
989 #endif /* HAVE_MMAP */
992 size = 512; /* number of bytes fm->contents can
993 hold at any given time. */
994 fm->content = xmalloc (size);
998 if (fm->length > size / 2)
1000 /* #### I'm not sure whether the whole exponential-growth
1001 thing makes sense with kernel read. On Linux at least,
1002 read() refuses to read more than 4K from a file at a
1003 single chunk anyway. But other Unixes might optimize it
1004 better, and it doesn't *hurt* anything, so I'm leaving
1007 /* Normally, we grow SIZE exponentially to make the number
1008 of calls to read() and realloc() logarithmic in relation
1009 to file size. However, read() can read an amount of data
1010 smaller than requested, and it would be unreasonably to
1011 double SIZE every time *something* was read. Therefore,
1012 we double SIZE only when the length exceeds half of the
1013 entire allocated size. */
1015 fm->content = xrealloc (fm->content, size);
1017 nread = read (fd, fm->content + fm->length, size - fm->length);
1019 /* Successful read. */
1020 fm->length += nread;
1030 if (size > fm->length && fm->length != 0)
1031 /* Due to exponential growth of fm->content, the allocated region
1032 might be much larger than what is actually needed. */
1033 fm->content = xrealloc (fm->content, fm->length);
1040 xfree (fm->content);
1045 /* Release the resources held by FM. Specifically, this calls
1046 munmap() or xfree() on fm->content, depending whether mmap or
1047 malloc/read were used to read in the file. It also frees the
1048 memory needed to hold the FM structure itself. */
1051 read_file_free (struct file_memory *fm)
1056 munmap (fm->content, fm->length);
1061 xfree (fm->content);
1066 /* Free the pointers in a NULL-terminated vector of pointers, then
1067 free the pointer itself. */
1069 free_vec (char **vec)
1080 /* Append vector V2 to vector V1. The function frees V2 and
1081 reallocates V1 (thus you may not use the contents of neither
1082 pointer after the call). If V1 is NULL, V2 is returned. */
1084 merge_vecs (char **v1, char **v2)
1094 /* To avoid j == 0 */
1099 for (i = 0; v1[i]; i++);
1101 for (j = 0; v2[j]; j++);
1102 /* Reallocate v1. */
1103 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1104 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1109 /* A set of simple-minded routines to store strings in a linked list.
1110 This used to also be used for searching, but now we have hash
1113 /* It's a shame that these simple things like linked lists and hash
1114 tables (see hash.c) need to be implemented over and over again. It
1115 would be nice to be able to use the routines from glib -- see
1116 www.gtk.org for details. However, that would make Wget depend on
1117 glib, and I want to avoid dependencies to external libraries for
1118 reasons of convenience and portability (I suspect Wget is more
1119 portable than anything ever written for Gnome). */
1121 /* Append an element to the list. If the list has a huge number of
1122 elements, this can get slow because it has to find the list's
1123 ending. If you think you have to call slist_append in a loop,
1124 think about calling slist_prepend() followed by slist_nreverse(). */
1127 slist_append (slist *l, const char *s)
1129 slist *newel = (slist *)xmalloc (sizeof (slist));
1132 newel->string = xstrdup (s);
1137 /* Find the last element. */
1144 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1147 slist_prepend (slist *l, const char *s)
1149 slist *newel = (slist *)xmalloc (sizeof (slist));
1150 newel->string = xstrdup (s);
1155 /* Destructively reverse L. */
1158 slist_nreverse (slist *l)
1163 slist *next = l->next;
1171 /* Is there a specific entry in the list? */
1173 slist_contains (slist *l, const char *s)
1175 for (; l; l = l->next)
1176 if (!strcmp (l->string, s))
1181 /* Free the whole slist. */
1183 slist_free (slist *l)
1194 /* Sometimes it's useful to create "sets" of strings, i.e. special
1195 hash tables where you want to store strings as keys and merely
1196 query for their existence. Here is a set of utility routines that
1197 makes that transparent. */
1200 string_set_add (struct hash_table *ht, const char *s)
1202 /* First check whether the set element already exists. If it does,
1203 do nothing so that we don't have to free() the old element and
1204 then strdup() a new one. */
1205 if (hash_table_contains (ht, s))
1208 /* We use "1" as value. It provides us a useful and clear arbitrary
1209 value, and it consumes no memory -- the pointers to the same
1210 string "1" will be shared by all the key-value pairs in all `set'
1212 hash_table_put (ht, xstrdup (s), "1");
1215 /* Synonym for hash_table_contains... */
1218 string_set_contains (struct hash_table *ht, const char *s)
1220 return hash_table_contains (ht, s);
1224 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1231 string_set_free (struct hash_table *ht)
1233 hash_table_map (ht, string_set_free_mapper, NULL);
1234 hash_table_destroy (ht);
1238 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1245 /* Another utility function: call free() on all keys and values of HT. */
1248 free_keys_and_values (struct hash_table *ht)
1250 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1254 /* Engine for legible and legible_very_long; this function works on
1258 legible_1 (const char *repr)
1260 static char outbuf[128];
1265 /* Reset the pointers. */
1268 /* If the number is negative, shift the pointers. */
1274 /* How many digits before the first separator? */
1275 mod = strlen (inptr) % 3;
1277 for (i = 0; i < mod; i++)
1278 *outptr++ = inptr[i];
1279 /* Now insert the rest of them, putting separator before every
1281 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1283 if (i % 3 == 0 && i1 != 0)
1285 *outptr++ = inptr[i1];
1287 /* Zero-terminate the string. */
1292 /* Legible -- return a static pointer to the legibly printed long. */
1297 /* Print the number into the buffer. */
1298 number_to_string (inbuf, l);
1299 return legible_1 (inbuf);
1302 /* Write a string representation of NUMBER into the provided buffer.
1303 We cannot use sprintf() because we cannot be sure whether the
1304 platform supports printing of what we chose for VERY_LONG_TYPE.
1306 Example: Gcc supports `long long' under many platforms, but on many
1307 of those the native libc knows nothing of it and therefore cannot
1310 How long BUFFER needs to be depends on the platform and the content
1311 of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24
1312 bytes are sufficient. Using more might be a good idea.
1314 This function does not go through the hoops that long_to_string
1315 goes to because it doesn't aspire to be fast. (It's called perhaps
1316 once in a Wget run.) */
1319 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1324 /* Print the number backwards... */
1327 buffer[i++] = '0' + number % 10;
1332 /* ...and reverse the order of the digits. */
1333 for (j = 0; j < i / 2; j++)
1336 buffer[j] = buffer[i - 1 - j];
1337 buffer[i - 1 - j] = c;
1342 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1344 legible_very_long (VERY_LONG_TYPE l)
1347 /* Print the number into the buffer. */
1348 very_long_to_string (inbuf, l);
1349 return legible_1 (inbuf);
1352 /* Count the digits in a (long) integer. */
1354 numdigit (long number)
1362 while ((number /= 10) > 0)
1367 /* A half-assed implementation of INT_MAX on machines that don't
1368 bother to define one. */
1370 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1373 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1374 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1376 #define DIGITS_1(figure) ONE_DIGIT (figure)
1377 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1378 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1379 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1380 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1381 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1382 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1383 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1384 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1385 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1387 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1389 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1390 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1391 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1392 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1393 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1394 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1395 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1396 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1397 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1399 /* Print NUMBER to BUFFER in base 10. This should be completely
1400 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1402 The speedup may make a difference in programs that frequently
1403 convert numbers to strings. Some implementations of sprintf,
1404 particularly the one in GNU libc, have been known to be extremely
1405 slow compared to this function.
1407 Return the pointer to the location where the terminating zero was
1408 printed. (Equivalent to calling buffer+strlen(buffer) after the
1411 BUFFER should be big enough to accept as many bytes as you expect
1412 the number to take up. On machines with 64-bit longs the maximum
1413 needed size is 24 bytes. That includes the digits needed for the
1414 largest 64-bit number, the `-' sign in case it's negative, and the
1415 terminating '\0'. */
1418 number_to_string (char *buffer, long number)
1423 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1424 /* We are running in a strange or misconfigured environment. Let
1425 sprintf cope with it. */
1426 sprintf (buffer, "%ld", n);
1427 p += strlen (buffer);
1428 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1434 /* We cannot print a '-' and assign -n to n because -n would
1435 overflow. Let sprintf deal with this border case. */
1436 sprintf (buffer, "%ld", n);
1437 p += strlen (buffer);
1445 if (n < 10) { DIGITS_1 (1); }
1446 else if (n < 100) { DIGITS_2 (10); }
1447 else if (n < 1000) { DIGITS_3 (100); }
1448 else if (n < 10000) { DIGITS_4 (1000); }
1449 else if (n < 100000) { DIGITS_5 (10000); }
1450 else if (n < 1000000) { DIGITS_6 (100000); }
1451 else if (n < 10000000) { DIGITS_7 (1000000); }
1452 else if (n < 100000000) { DIGITS_8 (10000000); }
1453 else if (n < 1000000000) { DIGITS_9 (100000000); }
1454 #if SIZEOF_LONG == 4
1455 /* ``if (1)'' serves only to preserve editor indentation. */
1456 else if (1) { DIGITS_10 (1000000000); }
1457 #else /* SIZEOF_LONG != 4 */
1458 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1459 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1460 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1461 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1462 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1463 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1464 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1465 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1466 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1467 else { DIGITS_19 (1000000000000000000L); }
1468 #endif /* SIZEOF_LONG != 4 */
1471 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1477 #undef ONE_DIGIT_ADVANCE
1499 /* Support for timers. */
1501 #undef TIMER_WINDOWS
1502 #undef TIMER_GETTIMEOFDAY
1505 /* Depending on the OS and availability of gettimeofday(), one and
1506 only one of the above constants will be defined. Virtually all
1507 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1508 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1509 non-Windows systems without gettimeofday.
1511 #### Perhaps we should also support ftime(), which exists on old
1512 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1513 C, if memory serves me.) */
1516 # define TIMER_WINDOWS
1517 #else /* not WINDOWS */
1518 # ifdef HAVE_GETTIMEOFDAY
1519 # define TIMER_GETTIMEOFDAY
1523 #endif /* not WINDOWS */
1526 #ifdef TIMER_GETTIMEOFDAY
1535 #ifdef TIMER_WINDOWS
1536 ULARGE_INTEGER wintime;
1540 /* Allocate a timer. It is not legal to do anything with a freshly
1541 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1544 wtimer_allocate (void)
1546 struct wget_timer *wt =
1547 (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1551 /* Allocate a new timer and reset it. Return the new timer. */
1556 struct wget_timer *wt = wtimer_allocate ();
1561 /* Free the resources associated with the timer. Its further use is
1565 wtimer_delete (struct wget_timer *wt)
1570 /* Reset timer WT. This establishes the starting point from which
1571 wtimer_elapsed() will return the number of elapsed
1572 milliseconds. It is allowed to reset a previously used timer. */
1575 wtimer_reset (struct wget_timer *wt)
1577 #ifdef TIMER_GETTIMEOFDAY
1579 gettimeofday (&t, NULL);
1580 wt->secs = t.tv_sec;
1581 wt->usecs = t.tv_usec;
1585 wt->secs = time (NULL);
1588 #ifdef TIMER_WINDOWS
1591 GetSystemTime (&st);
1592 SystemTimeToFileTime (&st, &ft);
1593 wt->wintime.HighPart = ft.dwHighDateTime;
1594 wt->wintime.LowPart = ft.dwLowDateTime;
1598 /* Return the number of milliseconds elapsed since the timer was last
1599 reset. It is allowed to call this function more than once to get
1600 increasingly higher elapsed values. */
1603 wtimer_elapsed (struct wget_timer *wt)
1605 #ifdef TIMER_GETTIMEOFDAY
1607 gettimeofday (&t, NULL);
1608 return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1612 time_t now = time (NULL);
1613 return 1000 * (now - wt->secs);
1620 GetSystemTime (&st);
1621 SystemTimeToFileTime (&st, &ft);
1622 uli.HighPart = ft.dwHighDateTime;
1623 uli.LowPart = ft.dwLowDateTime;
1624 return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1628 /* Return the assessed granularity of the timer implementation. This
1629 is important for certain code that tries to deal with "zero" time
1633 wtimer_granularity (void)
1635 #ifdef TIMER_GETTIMEOFDAY
1636 /* Granularity of gettimeofday is hugely architecture-dependent.
1637 However, it appears that on modern machines it is better than
1643 /* This is clear. */
1647 #ifdef TIMER_WINDOWS
1653 /* This should probably be at a better place, but it doesn't really
1654 fit into html-parse.c. */
1656 /* The function returns the pointer to the malloc-ed quoted version of
1657 string s. It will recognize and quote numeric and special graphic
1658 entities, as per RFC1866:
1666 No other entities are recognized or replaced. */
1668 html_quote_string (const char *s)
1674 /* Pass through the string, and count the new size. */
1675 for (i = 0; *s; s++, i++)
1678 i += 4; /* `amp;' */
1679 else if (*s == '<' || *s == '>')
1680 i += 3; /* `lt;' and `gt;' */
1681 else if (*s == '\"')
1682 i += 5; /* `quot;' */
1686 res = (char *)xmalloc (i + 1);
1688 for (p = res; *s; s++)
1701 *p++ = (*s == '<' ? 'l' : 'g');
1728 /* Determine the width of the terminal we're running on. If that's
1729 not possible, return 0. */
1732 determine_screen_width (void)
1734 /* If there's a way to get the terminal size using POSIX
1735 tcgetattr(), somebody please tell me. */
1738 #else /* TIOCGWINSZ */
1742 if (opt.lfilename != NULL)
1745 fd = fileno (stderr);
1746 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1747 return 0; /* most likely ENOTTY */
1750 #endif /* TIOCGWINSZ */
1753 /* Return a random number between 0 and MAX-1, inclusive.
1755 If MAX is greater than the value of RAND_MAX+1 on the system, the
1756 returned value will be in the range [0, RAND_MAX]. This may be
1757 fixed in a future release.
1759 The random number generator is seeded automatically the first time
1762 This uses rand() for portability. It has been suggested that
1763 random() offers better randomness, but this is not required for
1764 Wget, so I chose to go for simplicity and use rand
1768 random_number (int max)
1776 srand (time (NULL));
1781 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1782 and enforce that assumption by masking other bits. */
1784 # define RAND_MAX 32767
1788 /* This is equivalent to rand() % max, but uses the high-order bits
1789 for better randomness on architecture where rand() is implemented
1790 using a simple congruential generator. */
1792 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1793 return (int)bounded;
1797 /* A debugging function for checking whether an MD5 library works. */
1799 #include "gen-md5.h"
1802 debug_test_md5 (char *buf)
1804 unsigned char raw[16];
1805 static char res[33];
1809 ALLOCA_MD5_CONTEXT (ctx);
1812 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1813 gen_md5_finish (ctx, raw);
1820 *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1821 *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1830 /* Implementation of run_with_timeout, a generic timeout handler for
1831 systems with Unix-like signal handling. */
1832 #ifdef USE_SIGNAL_TIMEOUT
1833 # ifdef HAVE_SIGSETJMP
1834 # define SETJMP(env) sigsetjmp (env, 1)
1836 static sigjmp_buf run_with_timeout_env;
1839 abort_run_with_timeout (int sig)
1841 assert (sig == SIGALRM);
1842 siglongjmp (run_with_timeout_env, -1);
1844 # else /* not HAVE_SIGSETJMP */
1845 # define SETJMP(env) setjmp (env)
1847 static jmp_buf run_with_timeout_env;
1850 abort_run_with_timeout (int sig)
1852 assert (sig == SIGALRM);
1853 /* We don't have siglongjmp to preserve the set of blocked signals;
1854 if we longjumped out of the handler at this point, SIGALRM would
1855 remain blocked. We must unblock it manually. */
1856 int mask = siggetmask ();
1857 mask &= ~sigmask(SIGALRM);
1860 /* Now it's safe to longjump. */
1861 longjmp (run_with_timeout_env, -1);
1863 # endif /* not HAVE_SIGSETJMP */
1864 #endif /* USE_SIGNAL_TIMEOUT */
1867 run_with_timeout (long timeout, void (*fun) (void *), void *arg)
1869 #ifndef USE_SIGNAL_TIMEOUT
1881 signal (SIGALRM, abort_run_with_timeout);
1882 if (SETJMP (run_with_timeout_env) != 0)
1884 /* Longjumped out of FUN with a timeout. */
1885 signal (SIGALRM, SIG_DFL);
1891 /* Preserve errno in case alarm() or signal() modifies it. */
1892 saved_errno = errno;
1894 signal (SIGALRM, SIG_DFL);
1895 errno = saved_errno;