1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 In addition, as a special exception, the Free Software Foundation
22 gives permission to link the code of its release of Wget with the
23 OpenSSL project's "OpenSSL" library (or with modified versions of it
24 that use the same license as the "OpenSSL" library), and distribute
25 the linked executables. You must obey the GNU General Public License
26 in all respects for all of the code used other than "OpenSSL". If you
27 modify this file, you may extend this exception to your version of the
28 file, but you are not obligated to do so. If you do not wish to do
29 so, delete this exception statement from your version. */
37 #else /* not HAVE_STRING_H */
39 #endif /* not HAVE_STRING_H */
40 #include <sys/types.h>
45 # include <sys/mman.h>
54 #ifdef HAVE_SYS_UTIME_H
55 # include <sys/utime.h>
59 # include <libc.h> /* for access() */
64 /* For TIOCGWINSZ and friends: */
65 #ifdef HAVE_SYS_IOCTL_H
66 # include <sys/ioctl.h>
72 /* Needed for run_with_timeout. */
73 #undef USE_SIGNAL_TIMEOUT
80 /* If sigsetjmp is a macro, configure won't pick it up. */
82 # define HAVE_SIGSETJMP
85 # ifdef HAVE_SIGSETJMP
86 # define USE_SIGNAL_TIMEOUT
89 # define USE_SIGNAL_TIMEOUT
102 /* This section implements several wrappers around the basic
103 allocation routines. This is done for two reasons: first, so that
104 the callers of these functions need not consistently check for
105 errors. If there is not enough virtual memory for running Wget,
106 something is seriously wrong, and Wget exits with an appropriate
109 The second reason why these are useful is that, if DEBUG_MALLOC is
110 defined, they also provide a handy (if crude) malloc debugging
111 interface that checks memory leaks. */
113 /* Croak the fatal memory error and bail out with non-zero exit
116 memfatal (const char *what)
118 /* Make sure we don't try to store part of the log line, and thus
120 log_set_save_context (0);
121 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
125 /* These functions end with _real because they need to be
126 distinguished from the debugging functions, and from the macros.
129 If memory debugging is not turned on, wget.h defines these:
131 #define xmalloc xmalloc_real
132 #define xrealloc xrealloc_real
133 #define xstrdup xstrdup_real
136 In case of memory debugging, the definitions are a bit more
137 complex, because we want to provide more information, *and* we want
138 to call the debugging code. (The former is the reason why xmalloc
139 and friends need to be macros in the first place.) Then it looks
142 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
143 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
144 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
145 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
147 Each of the *_debug function does its magic and calls the real one. */
150 # define STATIC_IF_DEBUG static
152 # define STATIC_IF_DEBUG
155 STATIC_IF_DEBUG void *
156 xmalloc_real (size_t size)
158 void *ptr = malloc (size);
164 STATIC_IF_DEBUG void *
165 xrealloc_real (void *ptr, size_t newsize)
169 /* Not all Un*xes have the feature of realloc() that calling it with
170 a NULL-pointer is the same as malloc(), but it is easy to
173 newptr = realloc (ptr, newsize);
175 newptr = malloc (newsize);
177 memfatal ("realloc");
181 STATIC_IF_DEBUG char *
182 xstrdup_real (const char *s)
188 copy = malloc (l + 1);
191 memcpy (copy, s, l + 1);
192 #else /* HAVE_STRDUP */
196 #endif /* HAVE_STRDUP */
203 /* Crude home-grown routines for debugging some malloc-related
206 * Counting the number of malloc and free invocations, and reporting
207 the "balance", i.e. how many times more malloc was called than it
208 was the case with free.
210 * Making malloc store its entry into a simple array and free remove
211 stuff from that array. At the end, print the pointers which have
212 not been freed, along with the source file and the line number.
213 This also has the side-effect of detecting freeing memory that
216 Note that this kind of memory leak checking strongly depends on
217 every malloc() being followed by a free(), even if the program is
218 about to finish. Wget is careful to free the data structure it
219 allocated in init.c. */
221 static int malloc_count, free_count;
227 } malloc_debug[100000];
229 /* Both register_ptr and unregister_ptr take O(n) operations to run,
230 which can be a real problem. It would be nice to use a hash table
231 for malloc_debug, but the functions in hash.c are not suitable
232 because they can call malloc() themselves. Maybe it would work if
233 the hash table were preallocated to a huge size, and if we set the
234 rehash threshold to 1.0. */
236 /* Register PTR in malloc_debug. Abort if this is not possible
237 (presumably due to the number of current allocations exceeding the
238 size of malloc_debug.) */
241 register_ptr (void *ptr, const char *file, int line)
244 for (i = 0; i < countof (malloc_debug); i++)
245 if (malloc_debug[i].ptr == NULL)
247 malloc_debug[i].ptr = ptr;
248 malloc_debug[i].file = file;
249 malloc_debug[i].line = line;
255 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
256 malloc_debug. (This catches calling free() with a bogus pointer.) */
259 unregister_ptr (void *ptr)
262 for (i = 0; i < countof (malloc_debug); i++)
263 if (malloc_debug[i].ptr == ptr)
265 malloc_debug[i].ptr = NULL;
271 /* Print the malloc debug stats that can be gathered from the above
272 information. Currently this is the count of mallocs, frees, the
273 difference between the two, and the dump of the contents of
274 malloc_debug. The last part are the memory leaks. */
277 print_malloc_debug_stats (void)
280 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
281 malloc_count, free_count, malloc_count - free_count);
282 for (i = 0; i < countof (malloc_debug); i++)
283 if (malloc_debug[i].ptr != NULL)
284 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
285 malloc_debug[i].file, malloc_debug[i].line);
289 xmalloc_debug (size_t size, const char *source_file, int source_line)
291 void *ptr = xmalloc_real (size);
293 register_ptr (ptr, source_file, source_line);
298 xfree_debug (void *ptr, const char *source_file, int source_line)
300 assert (ptr != NULL);
302 unregister_ptr (ptr);
307 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
309 void *newptr = xrealloc_real (ptr, newsize);
313 register_ptr (newptr, source_file, source_line);
315 else if (newptr != ptr)
317 unregister_ptr (ptr);
318 register_ptr (newptr, source_file, source_line);
324 xstrdup_debug (const char *s, const char *source_file, int source_line)
326 char *copy = xstrdup_real (s);
328 register_ptr (copy, source_file, source_line);
332 #endif /* DEBUG_MALLOC */
334 /* Utility function: like xstrdup(), but also lowercases S. */
337 xstrdup_lower (const char *s)
339 char *copy = xstrdup (s);
346 /* Return a count of how many times CHR occurs in STRING. */
349 count_char (const char *string, char chr)
353 for (p = string; *p; p++)
359 /* Copy the string formed by two pointers (one on the beginning, other
360 on the char after the last char) to a new, malloc-ed location.
363 strdupdelim (const char *beg, const char *end)
365 char *res = (char *)xmalloc (end - beg + 1);
366 memcpy (res, beg, end - beg);
367 res[end - beg] = '\0';
371 /* Parse a string containing comma-separated elements, and return a
372 vector of char pointers with the elements. Spaces following the
373 commas are ignored. */
375 sepstring (const char *s)
389 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
390 res[i] = strdupdelim (p, s);
393 /* Skip the blanks following the ','. */
401 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
402 res[i] = strdupdelim (p, s);
407 /* Return pointer to a static char[] buffer in which zero-terminated
408 string-representation of TM (in form hh:mm:ss) is printed.
410 If TM is non-NULL, the current time-in-seconds will be stored
413 (#### This is misleading: one would expect TM would be used instead
414 of the current time in that case. This design was probably
415 influenced by the design time(2), and should be changed at some
416 points. No callers use non-NULL TM anyway.) */
419 time_str (time_t *tm)
421 static char output[15];
423 time_t secs = time (tm);
427 /* In case of error, return the empty string. Maybe we should
428 just abort if this happens? */
432 ptm = localtime (&secs);
433 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
437 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
440 datetime_str (time_t *tm)
442 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
444 time_t secs = time (tm);
448 /* In case of error, return the empty string. Maybe we should
449 just abort if this happens? */
453 ptm = localtime (&secs);
454 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
455 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
456 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
460 /* The Windows versions of the following two functions are defined in
465 fork_to_background (void)
468 /* Whether we arrange our own version of opt.lfilename here. */
473 opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
485 /* parent, no error */
486 printf (_("Continuing in background, pid %d.\n"), (int)pid);
488 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
489 exit (0); /* #### should we use _exit()? */
492 /* child: give up the privileges and keep running. */
494 freopen ("/dev/null", "r", stdin);
495 freopen ("/dev/null", "w", stdout);
496 freopen ("/dev/null", "w", stderr);
498 #endif /* not WINDOWS */
500 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
501 specified with TM. */
503 touch (const char *file, time_t tm)
505 #ifdef HAVE_STRUCT_UTIMBUF
506 struct utimbuf times;
507 times.actime = times.modtime = tm;
510 times[0] = times[1] = tm;
513 if (utime (file, ×) == -1)
514 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
517 /* Checks if FILE is a symbolic link, and removes it if it is. Does
518 nothing under MS-Windows. */
520 remove_link (const char *file)
525 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
527 DEBUGP (("Unlinking %s (symlink).\n", file));
530 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
531 file, strerror (errno));
536 /* Does FILENAME exist? This is quite a lousy implementation, since
537 it supplies no error codes -- only a yes-or-no answer. Thus it
538 will return that a file does not exist if, e.g., the directory is
539 unreadable. I don't mind it too much currently, though. The
540 proper way should, of course, be to have a third, error state,
541 other than true/false, but that would introduce uncalled-for
542 additional complexity to the callers. */
544 file_exists_p (const char *filename)
547 return access (filename, F_OK) >= 0;
550 return stat (filename, &buf) >= 0;
554 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
555 Returns 0 on error. */
557 file_non_directory_p (const char *path)
560 /* Use lstat() rather than stat() so that symbolic links pointing to
561 directories can be identified correctly. */
562 if (lstat (path, &buf) != 0)
564 return S_ISDIR (buf.st_mode) ? 0 : 1;
567 /* Return the size of file named by FILENAME, or -1 if it cannot be
568 opened or seeked into. */
570 file_size (const char *filename)
573 /* We use fseek rather than stat to determine the file size because
574 that way we can also verify whether the file is readable.
575 Inspired by the POST patch by Arnaud Wylie. */
576 FILE *fp = fopen (filename, "rb");
577 fseek (fp, 0, SEEK_END);
583 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
584 doesn't exist is found. Return a freshly allocated copy of the
588 unique_name_1 (const char *prefix)
591 int plen = strlen (prefix);
592 char *template = (char *)alloca (plen + 1 + 24);
593 char *template_tail = template + plen;
595 memcpy (template, prefix, plen);
596 *template_tail++ = '.';
599 number_to_string (template_tail, count++);
600 while (file_exists_p (template));
602 return xstrdup (template);
605 /* Return a unique file name, based on FILE.
607 More precisely, if FILE doesn't exist, it is returned unmodified.
608 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
609 file name that doesn't exist is returned.
611 The resulting file is not created, only verified that it didn't
612 exist at the point in time when the function was called.
613 Therefore, where security matters, don't rely that the file created
614 by this function exists until you open it with O_EXCL or
617 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
618 string. Otherwise, it may return FILE if the file doesn't exist
619 (and therefore doesn't need changing). */
622 unique_name (const char *file, int allow_passthrough)
624 /* If the FILE itself doesn't exist, return it without
626 if (!file_exists_p (file))
627 return allow_passthrough ? (char *)file : xstrdup (file);
629 /* Otherwise, find a numeric suffix that results in unused file name
631 return unique_name_1 (file);
634 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
635 are missing, create them first. In case any mkdir() call fails,
636 return its error status. Returns 0 on successful completion.
638 The behaviour of this function should be identical to the behaviour
639 of `mkdir -p' on systems where mkdir supports the `-p' option. */
641 make_directory (const char *directory)
648 /* Make a copy of dir, to be able to write to it. Otherwise, the
649 function is unsafe if called with a read-only char *argument. */
650 STRDUP_ALLOCA (dir, directory);
652 /* If the first character of dir is '/', skip it (and thus enable
653 creation of absolute-pathname directories. */
654 for (i = (*dir == '/'); 1; ++i)
656 for (; dir[i] && dir[i] != '/'; i++)
661 /* Check whether the directory already exists. Allow creation of
662 of intermediate directories to fail, as the initial path components
663 are not necessarily directories! */
664 if (!file_exists_p (dir))
665 ret = mkdir (dir, 0777);
676 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
677 should be a file name.
679 file_merge("/foo/bar", "baz") => "/foo/baz"
680 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
681 file_merge("foo", "bar") => "bar"
683 In other words, it's a simpler and gentler version of uri_merge_1. */
686 file_merge (const char *base, const char *file)
689 const char *cut = (const char *)strrchr (base, '/');
692 return xstrdup (file);
694 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
695 memcpy (result, base, cut - base);
696 result[cut - base] = '/';
697 strcpy (result + (cut - base) + 1, file);
702 static int in_acclist PARAMS ((const char *const *, const char *, int));
704 /* Determine whether a file is acceptable to be followed, according to
705 lists of patterns to accept/reject. */
707 acceptable (const char *s)
711 while (l && s[l] != '/')
718 return (in_acclist ((const char *const *)opt.accepts, s, 1)
719 && !in_acclist ((const char *const *)opt.rejects, s, 1));
721 return in_acclist ((const char *const *)opt.accepts, s, 1);
723 else if (opt.rejects)
724 return !in_acclist ((const char *const *)opt.rejects, s, 1);
728 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
729 `/something', frontcmp() will return 1 only if S2 begins with
730 `/something'. Otherwise, 0 is returned. */
732 frontcmp (const char *s1, const char *s2)
734 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
738 /* Iterate through STRLIST, and return the first element that matches
739 S, through wildcards or front comparison (as appropriate). */
741 proclist (char **strlist, const char *s, enum accd flags)
745 for (x = strlist; *x; x++)
746 if (has_wildcards_p (*x))
748 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
753 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
760 /* Returns whether DIRECTORY is acceptable for download, wrt the
761 include/exclude lists.
763 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
764 and absolute paths may be freely intermixed. */
766 accdir (const char *directory, enum accd flags)
768 /* Remove starting '/'. */
769 if (flags & ALLABS && *directory == '/')
773 if (!proclist (opt.includes, directory, flags))
778 if (proclist (opt.excludes, directory, flags))
784 /* Return non-zero if STRING ends with TAIL. For instance:
786 match_tail ("abc", "bc", 0) -> 1
787 match_tail ("abc", "ab", 0) -> 0
788 match_tail ("abc", "abc", 0) -> 1
790 If FOLD_CASE_P is non-zero, the comparison will be
794 match_tail (const char *string, const char *tail, int fold_case_p)
798 /* We want this to be fast, so we code two loops, one with
799 case-folding, one without. */
803 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
804 if (string[i] != tail[j])
809 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
810 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
814 /* If the tail was exhausted, the match was succesful. */
821 /* Checks whether string S matches each element of ACCEPTS. A list
822 element are matched either with fnmatch() or match_tail(),
823 according to whether the element contains wildcards or not.
825 If the BACKWARD is 0, don't do backward comparison -- just compare
828 in_acclist (const char *const *accepts, const char *s, int backward)
830 for (; *accepts; accepts++)
832 if (has_wildcards_p (*accepts))
834 /* fnmatch returns 0 if the pattern *does* match the
836 if (fnmatch (*accepts, s, 0) == 0)
843 if (match_tail (s, *accepts, 0))
848 if (!strcmp (s, *accepts))
856 /* Return the location of STR's suffix (file extension). Examples:
857 suffix ("foo.bar") -> "bar"
858 suffix ("foo.bar.baz") -> "baz"
859 suffix ("/foo/bar") -> NULL
860 suffix ("/foo.bar/baz") -> NULL */
862 suffix (const char *str)
866 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
870 return (char *)str + i;
875 /* Return non-zero if FNAME ends with a typical HTML suffix. The
876 following (case-insensitive) suffixes are presumed to be HTML files:
880 ?html (`?' matches one character)
882 #### CAVEAT. This is not necessarily a good indication that FNAME
883 refers to a file that contains HTML! */
885 has_html_suffix_p (const char *fname)
889 if ((suf = suffix (fname)) == NULL)
891 if (!strcasecmp (suf, "html"))
893 if (!strcasecmp (suf, "htm"))
895 if (suf[0] && !strcasecmp (suf + 1, "html"))
900 /* Read a line from FP and return the pointer to freshly allocated
901 storage. The stoarage space is obtained through malloc() and
902 should be freed with free() when it is no longer needed.
904 The length of the line is not limited, except by available memory.
905 The newline character at the end of line is retained. The line is
906 terminated with a zero character.
908 After end-of-file is encountered without anything being read, NULL
909 is returned. NULL is also returned on error. To distinguish
910 between these two cases, use the stdio function ferror(). */
913 read_whole_line (FILE *fp)
917 char *line = (char *)xmalloc (bufsize);
919 while (fgets (line + length, bufsize - length, fp))
921 length += strlen (line + length);
923 /* Possible for example when reading from a binary file where
924 a line begins with \0. */
927 if (line[length - 1] == '\n')
930 /* fgets() guarantees to read the whole line, or to use up the
931 space we've given it. We can double the buffer
934 line = xrealloc (line, bufsize);
936 if (length == 0 || ferror (fp))
941 if (length + 1 < bufsize)
942 /* Relieve the memory from our exponential greediness. We say
943 `length + 1' because the terminating \0 is not included in
944 LENGTH. We don't need to zero-terminate the string ourselves,
945 though, because fgets() does that. */
946 line = xrealloc (line, length + 1);
950 /* Read FILE into memory. A pointer to `struct file_memory' are
951 returned; use struct element `content' to access file contents, and
952 the element `length' to know the file length. `content' is *not*
953 zero-terminated, and you should *not* read or write beyond the [0,
954 length) range of characters.
956 After you are done with the file contents, call read_file_free to
959 Depending on the operating system and the type of file that is
960 being read, read_file() either mmap's the file into memory, or
961 reads the file into the core using read().
963 If file is named "-", fileno(stdin) is used for reading instead.
964 If you want to read from a real file named "-", use "./-" instead. */
967 read_file (const char *file)
970 struct file_memory *fm;
972 int inhibit_close = 0;
974 /* Some magic in the finest tradition of Perl and its kin: if FILE
975 is "-", just use stdin. */
980 /* Note that we don't inhibit mmap() in this case. If stdin is
981 redirected from a regular file, mmap() will still work. */
984 fd = open (file, O_RDONLY);
987 fm = xmalloc (sizeof (struct file_memory));
992 if (fstat (fd, &buf) < 0)
994 fm->length = buf.st_size;
995 /* NOTE: As far as I know, the callers of this function never
996 modify the file text. Relying on this would enable us to
997 specify PROT_READ and MAP_SHARED for a marginal gain in
998 efficiency, but at some cost to generality. */
999 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1000 MAP_PRIVATE, fd, 0);
1001 if (fm->content == (char *)MAP_FAILED)
1011 /* The most common reason why mmap() fails is that FD does not point
1012 to a plain file. However, it's also possible that mmap() doesn't
1013 work for a particular type of file. Therefore, whenever mmap()
1014 fails, we just fall back to the regular method. */
1015 #endif /* HAVE_MMAP */
1018 size = 512; /* number of bytes fm->contents can
1019 hold at any given time. */
1020 fm->content = xmalloc (size);
1024 if (fm->length > size / 2)
1026 /* #### I'm not sure whether the whole exponential-growth
1027 thing makes sense with kernel read. On Linux at least,
1028 read() refuses to read more than 4K from a file at a
1029 single chunk anyway. But other Unixes might optimize it
1030 better, and it doesn't *hurt* anything, so I'm leaving
1033 /* Normally, we grow SIZE exponentially to make the number
1034 of calls to read() and realloc() logarithmic in relation
1035 to file size. However, read() can read an amount of data
1036 smaller than requested, and it would be unreasonably to
1037 double SIZE every time *something* was read. Therefore,
1038 we double SIZE only when the length exceeds half of the
1039 entire allocated size. */
1041 fm->content = xrealloc (fm->content, size);
1043 nread = read (fd, fm->content + fm->length, size - fm->length);
1045 /* Successful read. */
1046 fm->length += nread;
1056 if (size > fm->length && fm->length != 0)
1057 /* Due to exponential growth of fm->content, the allocated region
1058 might be much larger than what is actually needed. */
1059 fm->content = xrealloc (fm->content, fm->length);
1066 xfree (fm->content);
1071 /* Release the resources held by FM. Specifically, this calls
1072 munmap() or xfree() on fm->content, depending whether mmap or
1073 malloc/read were used to read in the file. It also frees the
1074 memory needed to hold the FM structure itself. */
1077 read_file_free (struct file_memory *fm)
1082 munmap (fm->content, fm->length);
1087 xfree (fm->content);
1092 /* Free the pointers in a NULL-terminated vector of pointers, then
1093 free the pointer itself. */
1095 free_vec (char **vec)
1106 /* Append vector V2 to vector V1. The function frees V2 and
1107 reallocates V1 (thus you may not use the contents of neither
1108 pointer after the call). If V1 is NULL, V2 is returned. */
1110 merge_vecs (char **v1, char **v2)
1120 /* To avoid j == 0 */
1125 for (i = 0; v1[i]; i++);
1127 for (j = 0; v2[j]; j++);
1128 /* Reallocate v1. */
1129 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1130 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1135 /* A set of simple-minded routines to store strings in a linked list.
1136 This used to also be used for searching, but now we have hash
1139 /* It's a shame that these simple things like linked lists and hash
1140 tables (see hash.c) need to be implemented over and over again. It
1141 would be nice to be able to use the routines from glib -- see
1142 www.gtk.org for details. However, that would make Wget depend on
1143 glib, and I want to avoid dependencies to external libraries for
1144 reasons of convenience and portability (I suspect Wget is more
1145 portable than anything ever written for Gnome). */
1147 /* Append an element to the list. If the list has a huge number of
1148 elements, this can get slow because it has to find the list's
1149 ending. If you think you have to call slist_append in a loop,
1150 think about calling slist_prepend() followed by slist_nreverse(). */
1153 slist_append (slist *l, const char *s)
1155 slist *newel = (slist *)xmalloc (sizeof (slist));
1158 newel->string = xstrdup (s);
1163 /* Find the last element. */
1170 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1173 slist_prepend (slist *l, const char *s)
1175 slist *newel = (slist *)xmalloc (sizeof (slist));
1176 newel->string = xstrdup (s);
1181 /* Destructively reverse L. */
1184 slist_nreverse (slist *l)
1189 slist *next = l->next;
1197 /* Is there a specific entry in the list? */
1199 slist_contains (slist *l, const char *s)
1201 for (; l; l = l->next)
1202 if (!strcmp (l->string, s))
1207 /* Free the whole slist. */
1209 slist_free (slist *l)
1220 /* Sometimes it's useful to create "sets" of strings, i.e. special
1221 hash tables where you want to store strings as keys and merely
1222 query for their existence. Here is a set of utility routines that
1223 makes that transparent. */
1226 string_set_add (struct hash_table *ht, const char *s)
1228 /* First check whether the set element already exists. If it does,
1229 do nothing so that we don't have to free() the old element and
1230 then strdup() a new one. */
1231 if (hash_table_contains (ht, s))
1234 /* We use "1" as value. It provides us a useful and clear arbitrary
1235 value, and it consumes no memory -- the pointers to the same
1236 string "1" will be shared by all the key-value pairs in all `set'
1238 hash_table_put (ht, xstrdup (s), "1");
1241 /* Synonym for hash_table_contains... */
1244 string_set_contains (struct hash_table *ht, const char *s)
1246 return hash_table_contains (ht, s);
1250 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1257 string_set_free (struct hash_table *ht)
1259 hash_table_map (ht, string_set_free_mapper, NULL);
1260 hash_table_destroy (ht);
1264 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1271 /* Another utility function: call free() on all keys and values of HT. */
1274 free_keys_and_values (struct hash_table *ht)
1276 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1280 /* Engine for legible and legible_very_long; this function works on
1284 legible_1 (const char *repr)
1286 static char outbuf[128];
1291 /* Reset the pointers. */
1294 /* If the number is negative, shift the pointers. */
1300 /* How many digits before the first separator? */
1301 mod = strlen (inptr) % 3;
1303 for (i = 0; i < mod; i++)
1304 *outptr++ = inptr[i];
1305 /* Now insert the rest of them, putting separator before every
1307 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1309 if (i % 3 == 0 && i1 != 0)
1311 *outptr++ = inptr[i1];
1313 /* Zero-terminate the string. */
1318 /* Legible -- return a static pointer to the legibly printed long. */
1323 /* Print the number into the buffer. */
1324 number_to_string (inbuf, l);
1325 return legible_1 (inbuf);
1328 /* Write a string representation of NUMBER into the provided buffer.
1329 We cannot use sprintf() because we cannot be sure whether the
1330 platform supports printing of what we chose for VERY_LONG_TYPE.
1332 Example: Gcc supports `long long' under many platforms, but on many
1333 of those the native libc knows nothing of it and therefore cannot
1336 How long BUFFER needs to be depends on the platform and the content
1337 of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24
1338 bytes are sufficient. Using more might be a good idea.
1340 This function does not go through the hoops that long_to_string
1341 goes to because it doesn't aspire to be fast. (It's called perhaps
1342 once in a Wget run.) */
1345 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1350 /* Print the number backwards... */
1353 buffer[i++] = '0' + number % 10;
1358 /* ...and reverse the order of the digits. */
1359 for (j = 0; j < i / 2; j++)
1362 buffer[j] = buffer[i - 1 - j];
1363 buffer[i - 1 - j] = c;
1368 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1370 legible_very_long (VERY_LONG_TYPE l)
1373 /* Print the number into the buffer. */
1374 very_long_to_string (inbuf, l);
1375 return legible_1 (inbuf);
1378 /* Count the digits in a (long) integer. */
1380 numdigit (long number)
1388 while ((number /= 10) > 0)
1393 /* A half-assed implementation of INT_MAX on machines that don't
1394 bother to define one. */
1396 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1399 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1400 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1402 #define DIGITS_1(figure) ONE_DIGIT (figure)
1403 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1404 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1405 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1406 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1407 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1408 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1409 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1410 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1411 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1413 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1415 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1416 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1417 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1418 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1419 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1420 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1421 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1422 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1423 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1425 /* Print NUMBER to BUFFER in base 10. This should be completely
1426 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1428 The speedup may make a difference in programs that frequently
1429 convert numbers to strings. Some implementations of sprintf,
1430 particularly the one in GNU libc, have been known to be extremely
1431 slow compared to this function.
1433 Return the pointer to the location where the terminating zero was
1434 printed. (Equivalent to calling buffer+strlen(buffer) after the
1437 BUFFER should be big enough to accept as many bytes as you expect
1438 the number to take up. On machines with 64-bit longs the maximum
1439 needed size is 24 bytes. That includes the digits needed for the
1440 largest 64-bit number, the `-' sign in case it's negative, and the
1441 terminating '\0'. */
1444 number_to_string (char *buffer, long number)
1449 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1450 /* We are running in a strange or misconfigured environment. Let
1451 sprintf cope with it. */
1452 sprintf (buffer, "%ld", n);
1453 p += strlen (buffer);
1454 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1460 /* We cannot print a '-' and assign -n to n because -n would
1461 overflow. Let sprintf deal with this border case. */
1462 sprintf (buffer, "%ld", n);
1463 p += strlen (buffer);
1471 if (n < 10) { DIGITS_1 (1); }
1472 else if (n < 100) { DIGITS_2 (10); }
1473 else if (n < 1000) { DIGITS_3 (100); }
1474 else if (n < 10000) { DIGITS_4 (1000); }
1475 else if (n < 100000) { DIGITS_5 (10000); }
1476 else if (n < 1000000) { DIGITS_6 (100000); }
1477 else if (n < 10000000) { DIGITS_7 (1000000); }
1478 else if (n < 100000000) { DIGITS_8 (10000000); }
1479 else if (n < 1000000000) { DIGITS_9 (100000000); }
1480 #if SIZEOF_LONG == 4
1481 /* ``if (1)'' serves only to preserve editor indentation. */
1482 else if (1) { DIGITS_10 (1000000000); }
1483 #else /* SIZEOF_LONG != 4 */
1484 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1485 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1486 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1487 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1488 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1489 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1490 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1491 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1492 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1493 else { DIGITS_19 (1000000000000000000L); }
1494 #endif /* SIZEOF_LONG != 4 */
1497 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1503 #undef ONE_DIGIT_ADVANCE
1525 /* Support for timers. */
1527 #undef TIMER_WINDOWS
1528 #undef TIMER_GETTIMEOFDAY
1531 /* Depending on the OS and availability of gettimeofday(), one and
1532 only one of the above constants will be defined. Virtually all
1533 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1534 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1535 non-Windows systems without gettimeofday.
1537 #### Perhaps we should also support ftime(), which exists on old
1538 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1539 C, if memory serves me.) */
1542 # define TIMER_WINDOWS
1543 #else /* not WINDOWS */
1544 # ifdef HAVE_GETTIMEOFDAY
1545 # define TIMER_GETTIMEOFDAY
1549 #endif /* not WINDOWS */
1551 #ifdef TIMER_GETTIMEOFDAY
1552 typedef struct timeval wget_sys_time;
1556 typedef time_t wget_sys_time;
1559 #ifdef TIMER_WINDOWS
1560 typedef ULARGE_INTEGER wget_sys_time;
1564 /* The starting point in time which, subtracted from the current
1565 time, yields elapsed time. */
1566 wget_sys_time start;
1568 /* The most recent elapsed time, calculated by wtimer_elapsed().
1569 Measured in milliseconds. */
1570 double elapsed_last;
1572 /* Approximately, the time elapsed between the true start of the
1573 measurement and the time represented by START. */
1574 double elapsed_pre_start;
1577 /* Allocate a timer. It is not legal to do anything with a freshly
1578 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1581 wtimer_allocate (void)
1583 struct wget_timer *wt =
1584 (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1588 /* Allocate a new timer and reset it. Return the new timer. */
1593 struct wget_timer *wt = wtimer_allocate ();
1598 /* Free the resources associated with the timer. Its further use is
1602 wtimer_delete (struct wget_timer *wt)
1607 /* Store system time to WST. */
1610 wtimer_sys_set (wget_sys_time *wst)
1612 #ifdef TIMER_GETTIMEOFDAY
1613 gettimeofday (wst, NULL);
1620 #ifdef TIMER_WINDOWS
1621 /* We use GetSystemTime to get the elapsed time. MSDN warns that
1622 system clock adjustments can skew the output of GetSystemTime
1623 when used as a timer and gives preference to GetTickCount and
1624 high-resolution timers. But GetTickCount can overflow, and hires
1625 timers are typically used for profiling, not for regular time
1626 measurement. Since we handle clock skew anyway, we just use
1630 GetSystemTime (&st);
1632 /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1633 FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1634 arithmetic on that. */
1635 SystemTimeToFileTime (&st, &ft);
1636 wst->HighPart = ft.dwHighDateTime;
1637 wst->LowPart = ft.dwLowDateTime;
1641 /* Reset timer WT. This establishes the starting point from which
1642 wtimer_elapsed() will return the number of elapsed
1643 milliseconds. It is allowed to reset a previously used timer. */
1646 wtimer_reset (struct wget_timer *wt)
1648 /* Set the start time to the current time. */
1649 wtimer_sys_set (&wt->start);
1650 wt->elapsed_last = 0;
1651 wt->elapsed_pre_start = 0;
1655 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1657 #ifdef TIMER_GETTIMEOFDAY
1658 return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1659 + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1663 return 1000 * (*wst1 - *wst2);
1667 /* VC++ 6 doesn't support direct cast of uint64 to double. To work
1668 around this, we subtract, then convert to signed, then finally to
1670 return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1674 /* Return the number of milliseconds elapsed since the timer was last
1675 reset. It is allowed to call this function more than once to get
1676 increasingly higher elapsed values. These timers handle clock
1680 wtimer_elapsed (struct wget_timer *wt)
1685 wtimer_sys_set (&now);
1686 elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1688 /* Ideally we'd just return the difference between NOW and
1689 wt->start. However, the system timer can be set back, and we
1690 could return a value smaller than when we were last called, even
1691 a negative value. Both of these would confuse the callers, which
1692 expect us to return monotonically nondecreasing values.
1694 Therefore: if ELAPSED is smaller than its previous known value,
1695 we reset wt->start to the current time and effectively start
1696 measuring from this point. But since we don't want the elapsed
1697 value to start from zero, we set elapsed_pre_start to the last
1698 elapsed time and increment all future calculations by that
1701 if (elapsed < wt->elapsed_last)
1704 wt->elapsed_pre_start = wt->elapsed_last;
1705 elapsed = wt->elapsed_last;
1708 wt->elapsed_last = elapsed;
1712 /* Return the assessed granularity of the timer implementation, in
1713 milliseconds. This is used by code that tries to substitute a
1714 better value for timers that have returned zero. */
1717 wtimer_granularity (void)
1719 #ifdef TIMER_GETTIMEOFDAY
1720 /* Granularity of gettimeofday varies wildly between architectures.
1721 However, it appears that on modern machines it tends to be better
1722 than 1ms. Assume 100 usecs. (Perhaps the configure process
1723 could actually measure this?) */
1728 /* This is clear. */
1732 #ifdef TIMER_WINDOWS
1733 /* According to MSDN, GetSystemTime returns a broken-down time
1734 structure the smallest member of which are milliseconds. */
1739 /* This should probably be at a better place, but it doesn't really
1740 fit into html-parse.c. */
1742 /* The function returns the pointer to the malloc-ed quoted version of
1743 string s. It will recognize and quote numeric and special graphic
1744 entities, as per RFC1866:
1752 No other entities are recognized or replaced. */
1754 html_quote_string (const char *s)
1760 /* Pass through the string, and count the new size. */
1761 for (i = 0; *s; s++, i++)
1764 i += 4; /* `amp;' */
1765 else if (*s == '<' || *s == '>')
1766 i += 3; /* `lt;' and `gt;' */
1767 else if (*s == '\"')
1768 i += 5; /* `quot;' */
1772 res = (char *)xmalloc (i + 1);
1774 for (p = res; *s; s++)
1787 *p++ = (*s == '<' ? 'l' : 'g');
1814 /* Determine the width of the terminal we're running on. If that's
1815 not possible, return 0. */
1818 determine_screen_width (void)
1820 /* If there's a way to get the terminal size using POSIX
1821 tcgetattr(), somebody please tell me. */
1824 #else /* TIOCGWINSZ */
1828 if (opt.lfilename != NULL)
1831 fd = fileno (stderr);
1832 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1833 return 0; /* most likely ENOTTY */
1836 #endif /* TIOCGWINSZ */
1839 /* Return a random number between 0 and MAX-1, inclusive.
1841 If MAX is greater than the value of RAND_MAX+1 on the system, the
1842 returned value will be in the range [0, RAND_MAX]. This may be
1843 fixed in a future release.
1845 The random number generator is seeded automatically the first time
1848 This uses rand() for portability. It has been suggested that
1849 random() offers better randomness, but this is not required for
1850 Wget, so I chose to go for simplicity and use rand
1854 random_number (int max)
1862 srand (time (NULL));
1867 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1868 and enforce that assumption by masking other bits. */
1870 # define RAND_MAX 32767
1874 /* This is equivalent to rand() % max, but uses the high-order bits
1875 for better randomness on architecture where rand() is implemented
1876 using a simple congruential generator. */
1878 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1879 return (int)bounded;
1882 /* Return a random uniformly distributed floating point number in the
1883 [0, 1) range. The precision of returned numbers is 9 digits.
1885 Modify this to use erand48() where available! */
1890 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1891 sure it's greater than 1000. */
1892 int rnd1 = random_number (1000);
1893 int rnd2 = random_number (1000);
1894 int rnd3 = random_number (1000);
1895 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1899 /* A debugging function for checking whether an MD5 library works. */
1901 #include "gen-md5.h"
1904 debug_test_md5 (char *buf)
1906 unsigned char raw[16];
1907 static char res[33];
1911 ALLOCA_MD5_CONTEXT (ctx);
1914 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1915 gen_md5_finish (ctx, raw);
1922 *p2++ = XNUM_TO_digit (*p1 >> 4);
1923 *p2++ = XNUM_TO_digit (*p1 & 0xf);
1932 /* Implementation of run_with_timeout, a generic timeout handler for
1933 systems with Unix-like signal handling. */
1934 #ifdef USE_SIGNAL_TIMEOUT
1935 # ifdef HAVE_SIGSETJMP
1936 # define SETJMP(env) sigsetjmp (env, 1)
1938 static sigjmp_buf run_with_timeout_env;
1941 abort_run_with_timeout (int sig)
1943 assert (sig == SIGALRM);
1944 siglongjmp (run_with_timeout_env, -1);
1946 # else /* not HAVE_SIGSETJMP */
1947 # define SETJMP(env) setjmp (env)
1949 static jmp_buf run_with_timeout_env;
1952 abort_run_with_timeout (int sig)
1954 assert (sig == SIGALRM);
1955 /* We don't have siglongjmp to preserve the set of blocked signals;
1956 if we longjumped out of the handler at this point, SIGALRM would
1957 remain blocked. We must unblock it manually. */
1958 int mask = siggetmask ();
1959 mask &= ~sigmask(SIGALRM);
1962 /* Now it's safe to longjump. */
1963 longjmp (run_with_timeout_env, -1);
1965 # endif /* not HAVE_SIGSETJMP */
1966 #endif /* USE_SIGNAL_TIMEOUT */
1969 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1971 #ifndef USE_SIGNAL_TIMEOUT
1983 /* Calling alarm() rounds TIMEOUT. If it is smaller than 1, round
1984 it to 1, not to 0, because alarm(0) means "never deliver the
1985 alarm", i.e. "wait forever", which is not what someone who
1986 specifies a 0.5s timeout would expect. */
1990 signal (SIGALRM, abort_run_with_timeout);
1991 if (SETJMP (run_with_timeout_env) != 0)
1993 /* Longjumped out of FUN with a timeout. */
1994 signal (SIGALRM, SIG_DFL);
1997 alarm ((int) timeout);
2000 /* Preserve errno in case alarm() or signal() modifies it. */
2001 saved_errno = errno;
2003 signal (SIGALRM, SIG_DFL);
2004 errno = saved_errno;