1 /* Various utility functions.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
36 #else /* not HAVE_STRING_H */
38 #endif /* not HAVE_STRING_H */
39 #include <sys/types.h>
44 # include <sys/mman.h>
55 #ifdef HAVE_SYS_UTIME_H
56 # include <sys/utime.h>
60 # include <libc.h> /* for access() */
65 /* For TIOCGWINSZ and friends: */
66 #ifdef HAVE_SYS_IOCTL_H
67 # include <sys/ioctl.h>
73 /* Needed for run_with_timeout. */
74 #undef USE_SIGNAL_TIMEOUT
82 #ifndef HAVE_SIGSETJMP
83 /* If sigsetjmp is a macro, configure won't pick it up. */
85 # define HAVE_SIGSETJMP
90 # ifdef HAVE_SIGSETJMP
91 # define USE_SIGNAL_TIMEOUT
94 # define USE_SIGNAL_TIMEOUT
106 /* Utility function: like xstrdup(), but also lowercases S. */
109 xstrdup_lower (const char *s)
111 char *copy = xstrdup (s);
118 /* Return a count of how many times CHR occurs in STRING. */
121 count_char (const char *string, char chr)
125 for (p = string; *p; p++)
131 /* Copy the string formed by two pointers (one on the beginning, other
132 on the char after the last char) to a new, malloc-ed location.
135 strdupdelim (const char *beg, const char *end)
137 char *res = (char *)xmalloc (end - beg + 1);
138 memcpy (res, beg, end - beg);
139 res[end - beg] = '\0';
143 /* Parse a string containing comma-separated elements, and return a
144 vector of char pointers with the elements. Spaces following the
145 commas are ignored. */
147 sepstring (const char *s)
161 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
162 res[i] = strdupdelim (p, s);
165 /* Skip the blanks following the ','. */
173 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
174 res[i] = strdupdelim (p, s);
179 /* Return pointer to a static char[] buffer in which zero-terminated
180 string-representation of TM (in form hh:mm:ss) is printed.
182 If TM is NULL, the current time will be used. */
185 time_str (time_t *tm)
187 static char output[15];
189 time_t secs = tm ? *tm : time (NULL);
193 /* In case of error, return the empty string. Maybe we should
194 just abort if this happens? */
198 ptm = localtime (&secs);
199 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
203 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
206 datetime_str (time_t *tm)
208 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
210 time_t secs = tm ? *tm : time (NULL);
214 /* In case of error, return the empty string. Maybe we should
215 just abort if this happens? */
219 ptm = localtime (&secs);
220 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
221 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
222 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
226 /* The Windows versions of the following two functions are defined in
231 fork_to_background (void)
234 /* Whether we arrange our own version of opt.lfilename here. */
239 opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
251 /* parent, no error */
252 printf (_("Continuing in background, pid %d.\n"), (int)pid);
254 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
255 exit (0); /* #### should we use _exit()? */
258 /* child: give up the privileges and keep running. */
260 freopen ("/dev/null", "r", stdin);
261 freopen ("/dev/null", "w", stdout);
262 freopen ("/dev/null", "w", stderr);
264 #endif /* not WINDOWS */
266 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
267 specified with TM. */
269 touch (const char *file, time_t tm)
271 #ifdef HAVE_STRUCT_UTIMBUF
272 struct utimbuf times;
273 times.actime = times.modtime = tm;
276 times[0] = times[1] = tm;
279 if (utime (file, ×) == -1)
280 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
283 /* Checks if FILE is a symbolic link, and removes it if it is. Does
284 nothing under MS-Windows. */
286 remove_link (const char *file)
291 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
293 DEBUGP (("Unlinking %s (symlink).\n", file));
296 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
297 file, strerror (errno));
302 /* Does FILENAME exist? This is quite a lousy implementation, since
303 it supplies no error codes -- only a yes-or-no answer. Thus it
304 will return that a file does not exist if, e.g., the directory is
305 unreadable. I don't mind it too much currently, though. The
306 proper way should, of course, be to have a third, error state,
307 other than true/false, but that would introduce uncalled-for
308 additional complexity to the callers. */
310 file_exists_p (const char *filename)
313 return access (filename, F_OK) >= 0;
316 return stat (filename, &buf) >= 0;
320 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
321 Returns 0 on error. */
323 file_non_directory_p (const char *path)
326 /* Use lstat() rather than stat() so that symbolic links pointing to
327 directories can be identified correctly. */
328 if (lstat (path, &buf) != 0)
330 return S_ISDIR (buf.st_mode) ? 0 : 1;
333 /* Return the size of file named by FILENAME, or -1 if it cannot be
334 opened or seeked into. */
336 file_size (const char *filename)
339 /* We use fseek rather than stat to determine the file size because
340 that way we can also verify whether the file is readable.
341 Inspired by the POST patch by Arnaud Wylie. */
342 FILE *fp = fopen (filename, "rb");
345 fseek (fp, 0, SEEK_END);
351 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
352 doesn't exist is found. Return a freshly allocated copy of the
356 unique_name_1 (const char *prefix)
359 int plen = strlen (prefix);
360 char *template = (char *)alloca (plen + 1 + 24);
361 char *template_tail = template + plen;
363 memcpy (template, prefix, plen);
364 *template_tail++ = '.';
367 number_to_string (template_tail, count++);
368 while (file_exists_p (template));
370 return xstrdup (template);
373 /* Return a unique file name, based on FILE.
375 More precisely, if FILE doesn't exist, it is returned unmodified.
376 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
377 file name that doesn't exist is returned.
379 The resulting file is not created, only verified that it didn't
380 exist at the point in time when the function was called.
381 Therefore, where security matters, don't rely that the file created
382 by this function exists until you open it with O_EXCL or
385 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
386 string. Otherwise, it may return FILE if the file doesn't exist
387 (and therefore doesn't need changing). */
390 unique_name (const char *file, int allow_passthrough)
392 /* If the FILE itself doesn't exist, return it without
394 if (!file_exists_p (file))
395 return allow_passthrough ? (char *)file : xstrdup (file);
397 /* Otherwise, find a numeric suffix that results in unused file name
399 return unique_name_1 (file);
402 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
403 are missing, create them first. In case any mkdir() call fails,
404 return its error status. Returns 0 on successful completion.
406 The behaviour of this function should be identical to the behaviour
407 of `mkdir -p' on systems where mkdir supports the `-p' option. */
409 make_directory (const char *directory)
416 /* Make a copy of dir, to be able to write to it. Otherwise, the
417 function is unsafe if called with a read-only char *argument. */
418 STRDUP_ALLOCA (dir, directory);
420 /* If the first character of dir is '/', skip it (and thus enable
421 creation of absolute-pathname directories. */
422 for (i = (*dir == '/'); 1; ++i)
424 for (; dir[i] && dir[i] != '/'; i++)
429 /* Check whether the directory already exists. Allow creation of
430 of intermediate directories to fail, as the initial path components
431 are not necessarily directories! */
432 if (!file_exists_p (dir))
433 ret = mkdir (dir, 0777);
444 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
445 should be a file name.
447 file_merge("/foo/bar", "baz") => "/foo/baz"
448 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
449 file_merge("foo", "bar") => "bar"
451 In other words, it's a simpler and gentler version of uri_merge_1. */
454 file_merge (const char *base, const char *file)
457 const char *cut = (const char *)strrchr (base, '/');
460 return xstrdup (file);
462 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
463 memcpy (result, base, cut - base);
464 result[cut - base] = '/';
465 strcpy (result + (cut - base) + 1, file);
470 static int in_acclist PARAMS ((const char *const *, const char *, int));
472 /* Determine whether a file is acceptable to be followed, according to
473 lists of patterns to accept/reject. */
475 acceptable (const char *s)
479 while (l && s[l] != '/')
486 return (in_acclist ((const char *const *)opt.accepts, s, 1)
487 && !in_acclist ((const char *const *)opt.rejects, s, 1));
489 return in_acclist ((const char *const *)opt.accepts, s, 1);
491 else if (opt.rejects)
492 return !in_acclist ((const char *const *)opt.rejects, s, 1);
496 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
497 `/something', frontcmp() will return 1 only if S2 begins with
498 `/something'. Otherwise, 0 is returned. */
500 frontcmp (const char *s1, const char *s2)
502 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
506 /* Iterate through STRLIST, and return the first element that matches
507 S, through wildcards or front comparison (as appropriate). */
509 proclist (char **strlist, const char *s, enum accd flags)
513 for (x = strlist; *x; x++)
514 if (has_wildcards_p (*x))
516 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
521 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
528 /* Returns whether DIRECTORY is acceptable for download, wrt the
529 include/exclude lists.
531 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
532 and absolute paths may be freely intermixed. */
534 accdir (const char *directory, enum accd flags)
536 /* Remove starting '/'. */
537 if (flags & ALLABS && *directory == '/')
541 if (!proclist (opt.includes, directory, flags))
546 if (proclist (opt.excludes, directory, flags))
552 /* Return non-zero if STRING ends with TAIL. For instance:
554 match_tail ("abc", "bc", 0) -> 1
555 match_tail ("abc", "ab", 0) -> 0
556 match_tail ("abc", "abc", 0) -> 1
558 If FOLD_CASE_P is non-zero, the comparison will be
562 match_tail (const char *string, const char *tail, int fold_case_p)
566 /* We want this to be fast, so we code two loops, one with
567 case-folding, one without. */
571 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
572 if (string[i] != tail[j])
577 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
578 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
582 /* If the tail was exhausted, the match was succesful. */
589 /* Checks whether string S matches each element of ACCEPTS. A list
590 element are matched either with fnmatch() or match_tail(),
591 according to whether the element contains wildcards or not.
593 If the BACKWARD is 0, don't do backward comparison -- just compare
596 in_acclist (const char *const *accepts, const char *s, int backward)
598 for (; *accepts; accepts++)
600 if (has_wildcards_p (*accepts))
602 /* fnmatch returns 0 if the pattern *does* match the
604 if (fnmatch (*accepts, s, 0) == 0)
611 if (match_tail (s, *accepts, 0))
616 if (!strcmp (s, *accepts))
624 /* Return the location of STR's suffix (file extension). Examples:
625 suffix ("foo.bar") -> "bar"
626 suffix ("foo.bar.baz") -> "baz"
627 suffix ("/foo/bar") -> NULL
628 suffix ("/foo.bar/baz") -> NULL */
630 suffix (const char *str)
634 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
638 return (char *)str + i;
643 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
647 has_wildcards_p (const char *s)
650 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
655 /* Return non-zero if FNAME ends with a typical HTML suffix. The
656 following (case-insensitive) suffixes are presumed to be HTML files:
660 ?html (`?' matches one character)
662 #### CAVEAT. This is not necessarily a good indication that FNAME
663 refers to a file that contains HTML! */
665 has_html_suffix_p (const char *fname)
669 if ((suf = suffix (fname)) == NULL)
671 if (!strcasecmp (suf, "html"))
673 if (!strcasecmp (suf, "htm"))
675 if (suf[0] && !strcasecmp (suf + 1, "html"))
680 /* Read a line from FP and return the pointer to freshly allocated
681 storage. The storage space is obtained through malloc() and should
682 be freed with free() when it is no longer needed.
684 The length of the line is not limited, except by available memory.
685 The newline character at the end of line is retained. The line is
686 terminated with a zero character.
688 After end-of-file is encountered without anything being read, NULL
689 is returned. NULL is also returned on error. To distinguish
690 between these two cases, use the stdio function ferror(). */
693 read_whole_line (FILE *fp)
697 char *line = (char *)xmalloc (bufsize);
699 while (fgets (line + length, bufsize - length, fp))
701 length += strlen (line + length);
703 /* Possible for example when reading from a binary file where
704 a line begins with \0. */
707 if (line[length - 1] == '\n')
710 /* fgets() guarantees to read the whole line, or to use up the
711 space we've given it. We can double the buffer
714 line = xrealloc (line, bufsize);
716 if (length == 0 || ferror (fp))
721 if (length + 1 < bufsize)
722 /* Relieve the memory from our exponential greediness. We say
723 `length + 1' because the terminating \0 is not included in
724 LENGTH. We don't need to zero-terminate the string ourselves,
725 though, because fgets() does that. */
726 line = xrealloc (line, length + 1);
730 /* Read FILE into memory. A pointer to `struct file_memory' are
731 returned; use struct element `content' to access file contents, and
732 the element `length' to know the file length. `content' is *not*
733 zero-terminated, and you should *not* read or write beyond the [0,
734 length) range of characters.
736 After you are done with the file contents, call read_file_free to
739 Depending on the operating system and the type of file that is
740 being read, read_file() either mmap's the file into memory, or
741 reads the file into the core using read().
743 If file is named "-", fileno(stdin) is used for reading instead.
744 If you want to read from a real file named "-", use "./-" instead. */
747 read_file (const char *file)
750 struct file_memory *fm;
752 int inhibit_close = 0;
754 /* Some magic in the finest tradition of Perl and its kin: if FILE
755 is "-", just use stdin. */
760 /* Note that we don't inhibit mmap() in this case. If stdin is
761 redirected from a regular file, mmap() will still work. */
764 fd = open (file, O_RDONLY);
767 fm = xnew (struct file_memory);
772 if (fstat (fd, &buf) < 0)
774 fm->length = buf.st_size;
775 /* NOTE: As far as I know, the callers of this function never
776 modify the file text. Relying on this would enable us to
777 specify PROT_READ and MAP_SHARED for a marginal gain in
778 efficiency, but at some cost to generality. */
779 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
781 if (fm->content == (char *)MAP_FAILED)
791 /* The most common reason why mmap() fails is that FD does not point
792 to a plain file. However, it's also possible that mmap() doesn't
793 work for a particular type of file. Therefore, whenever mmap()
794 fails, we just fall back to the regular method. */
795 #endif /* HAVE_MMAP */
798 size = 512; /* number of bytes fm->contents can
799 hold at any given time. */
800 fm->content = xmalloc (size);
804 if (fm->length > size / 2)
806 /* #### I'm not sure whether the whole exponential-growth
807 thing makes sense with kernel read. On Linux at least,
808 read() refuses to read more than 4K from a file at a
809 single chunk anyway. But other Unixes might optimize it
810 better, and it doesn't *hurt* anything, so I'm leaving
813 /* Normally, we grow SIZE exponentially to make the number
814 of calls to read() and realloc() logarithmic in relation
815 to file size. However, read() can read an amount of data
816 smaller than requested, and it would be unreasonable to
817 double SIZE every time *something* was read. Therefore,
818 we double SIZE only when the length exceeds half of the
819 entire allocated size. */
821 fm->content = xrealloc (fm->content, size);
823 nread = read (fd, fm->content + fm->length, size - fm->length);
825 /* Successful read. */
836 if (size > fm->length && fm->length != 0)
837 /* Due to exponential growth of fm->content, the allocated region
838 might be much larger than what is actually needed. */
839 fm->content = xrealloc (fm->content, fm->length);
851 /* Release the resources held by FM. Specifically, this calls
852 munmap() or xfree() on fm->content, depending whether mmap or
853 malloc/read were used to read in the file. It also frees the
854 memory needed to hold the FM structure itself. */
857 read_file_free (struct file_memory *fm)
862 munmap (fm->content, fm->length);
872 /* Free the pointers in a NULL-terminated vector of pointers, then
873 free the pointer itself. */
875 free_vec (char **vec)
886 /* Append vector V2 to vector V1. The function frees V2 and
887 reallocates V1 (thus you may not use the contents of neither
888 pointer after the call). If V1 is NULL, V2 is returned. */
890 merge_vecs (char **v1, char **v2)
900 /* To avoid j == 0 */
905 for (i = 0; v1[i]; i++);
907 for (j = 0; v2[j]; j++);
909 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
910 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
915 /* A set of simple-minded routines to store strings in a linked list.
916 This used to also be used for searching, but now we have hash
919 /* It's a shame that these simple things like linked lists and hash
920 tables (see hash.c) need to be implemented over and over again. It
921 would be nice to be able to use the routines from glib -- see
922 www.gtk.org for details. However, that would make Wget depend on
923 glib, and I want to avoid dependencies to external libraries for
924 reasons of convenience and portability (I suspect Wget is more
925 portable than anything ever written for Gnome). */
927 /* Append an element to the list. If the list has a huge number of
928 elements, this can get slow because it has to find the list's
929 ending. If you think you have to call slist_append in a loop,
930 think about calling slist_prepend() followed by slist_nreverse(). */
933 slist_append (slist *l, const char *s)
935 slist *newel = xnew (slist);
938 newel->string = xstrdup (s);
943 /* Find the last element. */
950 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
953 slist_prepend (slist *l, const char *s)
955 slist *newel = xnew (slist);
956 newel->string = xstrdup (s);
961 /* Destructively reverse L. */
964 slist_nreverse (slist *l)
969 slist *next = l->next;
977 /* Is there a specific entry in the list? */
979 slist_contains (slist *l, const char *s)
981 for (; l; l = l->next)
982 if (!strcmp (l->string, s))
987 /* Free the whole slist. */
989 slist_free (slist *l)
1000 /* Sometimes it's useful to create "sets" of strings, i.e. special
1001 hash tables where you want to store strings as keys and merely
1002 query for their existence. Here is a set of utility routines that
1003 makes that transparent. */
1006 string_set_add (struct hash_table *ht, const char *s)
1008 /* First check whether the set element already exists. If it does,
1009 do nothing so that we don't have to free() the old element and
1010 then strdup() a new one. */
1011 if (hash_table_contains (ht, s))
1014 /* We use "1" as value. It provides us a useful and clear arbitrary
1015 value, and it consumes no memory -- the pointers to the same
1016 string "1" will be shared by all the key-value pairs in all `set'
1018 hash_table_put (ht, xstrdup (s), "1");
1021 /* Synonym for hash_table_contains... */
1024 string_set_contains (struct hash_table *ht, const char *s)
1026 return hash_table_contains (ht, s);
1030 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1037 string_set_free (struct hash_table *ht)
1039 hash_table_map (ht, string_set_free_mapper, NULL);
1040 hash_table_destroy (ht);
1044 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1051 /* Another utility function: call free() on all keys and values of HT. */
1054 free_keys_and_values (struct hash_table *ht)
1056 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1060 /* Engine for legible and legible_large_int; add thousand separators
1061 to numbers printed in strings. */
1064 legible_1 (const char *repr)
1066 static char outbuf[48];
1071 /* Reset the pointers. */
1075 /* Ignore the sign for the purpose of adding thousand
1082 /* How many digits before the first separator? */
1083 mod = strlen (inptr) % 3;
1085 for (i = 0; i < mod; i++)
1086 *outptr++ = inptr[i];
1087 /* Now insert the rest of them, putting separator before every
1089 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1091 if (i % 3 == 0 && i1 != 0)
1093 *outptr++ = inptr[i1];
1095 /* Zero-terminate the string. */
1100 /* Legible -- return a static pointer to the legibly printed long. */
1106 /* Print the number into the buffer. */
1107 number_to_string (inbuf, l);
1108 return legible_1 (inbuf);
1111 /* Write a string representation of LARGE_INT NUMBER into the provided
1112 buffer. The buffer should be able to accept 24 characters,
1113 including the terminating zero.
1115 It would be dangerous to use sprintf, because the code wouldn't
1116 work on a machine with gcc-provided long long support, but without
1117 libc support for "%lld". However, such platforms will typically
1118 not have snprintf and will use our version, which does support
1119 "%lld" where long longs are available. */
1122 large_int_to_string (char *buffer, LARGE_INT number)
1124 snprintf (buffer, 24, LARGE_INT_FMT, number);
1127 /* The same as legible(), but works on LARGE_INT. */
1130 legible_large_int (LARGE_INT l)
1133 large_int_to_string (inbuf, l);
1134 return legible_1 (inbuf);
1137 /* Count the digits in a (long) integer. */
1139 numdigit (long number)
1147 while ((number /= 10) > 0)
1152 /* Attempt to calculate INT_MAX on machines that don't bother to
1158 # define INT_MAX ((int) ~((unsigned)1 << CHAR_BIT * sizeof (int) - 1))
1161 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1162 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1164 #define DIGITS_1(figure) ONE_DIGIT (figure)
1165 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1166 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1167 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1168 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1169 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1170 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1171 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1172 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1173 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1175 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1177 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1178 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1179 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1180 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1181 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1182 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1183 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1184 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1185 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1187 /* Print NUMBER to BUFFER in base 10. This should be completely
1188 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1190 The speedup may make a difference in programs that frequently
1191 convert numbers to strings. Some implementations of sprintf,
1192 particularly the one in GNU libc, have been known to be extremely
1193 slow compared to this function.
1195 Return the pointer to the location where the terminating zero was
1196 printed. (Equivalent to calling buffer+strlen(buffer) after the
1199 BUFFER should be big enough to accept as many bytes as you expect
1200 the number to take up. On machines with 64-bit longs the maximum
1201 needed size is 24 bytes. That includes the digits needed for the
1202 largest 64-bit number, the `-' sign in case it's negative, and the
1203 terminating '\0'. */
1206 number_to_string (char *buffer, long number)
1211 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1212 /* We are running in a strange or misconfigured environment. Let
1213 sprintf cope with it. */
1214 sprintf (buffer, "%ld", n);
1215 p += strlen (buffer);
1216 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1222 /* We cannot print a '-' and assign -n to n because -n would
1223 overflow. Let sprintf deal with this border case. */
1224 sprintf (buffer, "%ld", n);
1225 p += strlen (buffer);
1233 if (n < 10) { DIGITS_1 (1); }
1234 else if (n < 100) { DIGITS_2 (10); }
1235 else if (n < 1000) { DIGITS_3 (100); }
1236 else if (n < 10000) { DIGITS_4 (1000); }
1237 else if (n < 100000) { DIGITS_5 (10000); }
1238 else if (n < 1000000) { DIGITS_6 (100000); }
1239 else if (n < 10000000) { DIGITS_7 (1000000); }
1240 else if (n < 100000000) { DIGITS_8 (10000000); }
1241 else if (n < 1000000000) { DIGITS_9 (100000000); }
1242 #if SIZEOF_LONG == 4
1243 /* ``if (1)'' serves only to preserve editor indentation. */
1244 else if (1) { DIGITS_10 (1000000000); }
1245 #else /* SIZEOF_LONG != 4 */
1246 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1247 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1248 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1249 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1250 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1251 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1252 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1253 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1254 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1255 else { DIGITS_19 (1000000000000000000L); }
1256 #endif /* SIZEOF_LONG != 4 */
1259 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1265 #undef ONE_DIGIT_ADVANCE
1287 /* Support for timers. */
1289 #undef TIMER_WINDOWS
1290 #undef TIMER_GETTIMEOFDAY
1293 /* Depending on the OS and availability of gettimeofday(), one and
1294 only one of the above constants will be defined. Virtually all
1295 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1296 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1297 non-Windows systems without gettimeofday.
1299 #### Perhaps we should also support ftime(), which exists on old
1300 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1301 C, if memory serves me.) */
1304 # define TIMER_WINDOWS
1305 #else /* not WINDOWS */
1306 # ifdef HAVE_GETTIMEOFDAY
1307 # define TIMER_GETTIMEOFDAY
1311 #endif /* not WINDOWS */
1313 #ifdef TIMER_GETTIMEOFDAY
1314 typedef struct timeval wget_sys_time;
1318 typedef time_t wget_sys_time;
1321 #ifdef TIMER_WINDOWS
1322 typedef ULARGE_INTEGER wget_sys_time;
1326 /* Whether the start time has been initialized. */
1329 /* The starting point in time which, subtracted from the current
1330 time, yields elapsed time. */
1331 wget_sys_time start;
1333 /* The most recent elapsed time, calculated by wtimer_elapsed().
1334 Measured in milliseconds. */
1335 double elapsed_last;
1337 /* Approximately, the time elapsed between the true start of the
1338 measurement and the time represented by START. */
1339 double elapsed_pre_start;
1342 /* Allocate a timer. Calling wtimer_read on the timer will return
1343 zero. It is not legal to call wtimer_update with a freshly
1344 allocated timer -- use wtimer_reset first. */
1347 wtimer_allocate (void)
1349 struct wget_timer *wt = xnew (struct wget_timer);
1354 /* Allocate a new timer and reset it. Return the new timer. */
1359 struct wget_timer *wt = wtimer_allocate ();
1364 /* Free the resources associated with the timer. Its further use is
1368 wtimer_delete (struct wget_timer *wt)
1373 /* Store system time to WST. */
1376 wtimer_sys_set (wget_sys_time *wst)
1378 #ifdef TIMER_GETTIMEOFDAY
1379 gettimeofday (wst, NULL);
1386 #ifdef TIMER_WINDOWS
1387 /* We use GetSystemTime to get the elapsed time. MSDN warns that
1388 system clock adjustments can skew the output of GetSystemTime
1389 when used as a timer and gives preference to GetTickCount and
1390 high-resolution timers. But GetTickCount can overflow, and hires
1391 timers are typically used for profiling, not for regular time
1392 measurement. Since we handle clock skew anyway, we just use
1396 GetSystemTime (&st);
1398 /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1399 FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1400 arithmetic on that. */
1401 SystemTimeToFileTime (&st, &ft);
1402 wst->HighPart = ft.dwHighDateTime;
1403 wst->LowPart = ft.dwLowDateTime;
1407 /* Reset timer WT. This establishes the starting point from which
1408 wtimer_elapsed() will return the number of elapsed milliseconds.
1409 It is allowed to reset a previously used timer.
1411 If a non-zero value is used as START, the timer's values will be
1415 wtimer_reset (struct wget_timer *wt)
1417 /* Set the start time to the current time. */
1418 wtimer_sys_set (&wt->start);
1419 wt->elapsed_last = 0;
1420 wt->elapsed_pre_start = 0;
1421 wt->initialized = 1;
1425 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1427 #ifdef TIMER_GETTIMEOFDAY
1428 return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1429 + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1433 return 1000 * (*wst1 - *wst2);
1437 /* VC++ 6 doesn't support direct cast of uint64 to double. To work
1438 around this, we subtract, then convert to signed, then finally to
1440 return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1444 /* Update the timer's elapsed interval. This function causes the
1445 timer to call gettimeofday (or time(), etc.) to update its idea of
1446 current time. To get the elapsed interval in milliseconds, use
1449 This function handles clock skew, i.e. time that moves backwards is
1453 wtimer_update (struct wget_timer *wt)
1458 assert (wt->initialized != 0);
1460 wtimer_sys_set (&now);
1461 elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1463 /* Ideally we'd just return the difference between NOW and
1464 wt->start. However, the system timer can be set back, and we
1465 could return a value smaller than when we were last called, even
1466 a negative value. Both of these would confuse the callers, which
1467 expect us to return monotonically nondecreasing values.
1469 Therefore: if ELAPSED is smaller than its previous known value,
1470 we reset wt->start to the current time and effectively start
1471 measuring from this point. But since we don't want the elapsed
1472 value to start from zero, we set elapsed_pre_start to the last
1473 elapsed time and increment all future calculations by that
1476 if (elapsed < wt->elapsed_last)
1479 wt->elapsed_pre_start = wt->elapsed_last;
1480 elapsed = wt->elapsed_last;
1483 wt->elapsed_last = elapsed;
1486 /* Return the elapsed time in milliseconds between the last call to
1487 wtimer_reset and the last call to wtimer_update.
1489 A typical use of the timer interface would be:
1491 struct wtimer *timer = wtimer_new ();
1492 ... do something that takes a while ...
1494 double msecs = wtimer_read (); */
1497 wtimer_read (const struct wget_timer *wt)
1499 return wt->elapsed_last;
1502 /* Return the assessed granularity of the timer implementation, in
1503 milliseconds. This is used by code that tries to substitute a
1504 better value for timers that have returned zero. */
1507 wtimer_granularity (void)
1509 #ifdef TIMER_GETTIMEOFDAY
1510 /* Granularity of gettimeofday varies wildly between architectures.
1511 However, it appears that on modern machines it tends to be better
1512 than 1ms. Assume 100 usecs. (Perhaps the configure process
1513 could actually measure this?) */
1521 #ifdef TIMER_WINDOWS
1522 /* According to MSDN, GetSystemTime returns a broken-down time
1523 structure the smallest member of which are milliseconds. */
1528 /* This should probably be at a better place, but it doesn't really
1529 fit into html-parse.c. */
1531 /* The function returns the pointer to the malloc-ed quoted version of
1532 string s. It will recognize and quote numeric and special graphic
1533 entities, as per RFC1866:
1541 No other entities are recognized or replaced. */
1543 html_quote_string (const char *s)
1549 /* Pass through the string, and count the new size. */
1550 for (i = 0; *s; s++, i++)
1553 i += 4; /* `amp;' */
1554 else if (*s == '<' || *s == '>')
1555 i += 3; /* `lt;' and `gt;' */
1556 else if (*s == '\"')
1557 i += 5; /* `quot;' */
1561 res = (char *)xmalloc (i + 1);
1563 for (p = res; *s; s++)
1576 *p++ = (*s == '<' ? 'l' : 'g');
1603 /* Determine the width of the terminal we're running on. If that's
1604 not possible, return 0. */
1607 determine_screen_width (void)
1609 /* If there's a way to get the terminal size using POSIX
1610 tcgetattr(), somebody please tell me. */
1613 #else /* TIOCGWINSZ */
1617 if (opt.lfilename != NULL)
1620 fd = fileno (stderr);
1621 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1622 return 0; /* most likely ENOTTY */
1625 #endif /* TIOCGWINSZ */
1628 /* Return a random number between 0 and MAX-1, inclusive.
1630 If MAX is greater than the value of RAND_MAX+1 on the system, the
1631 returned value will be in the range [0, RAND_MAX]. This may be
1632 fixed in a future release.
1634 The random number generator is seeded automatically the first time
1637 This uses rand() for portability. It has been suggested that
1638 random() offers better randomness, but this is not required for
1639 Wget, so I chose to go for simplicity and use rand
1642 DO NOT use this for cryptographic purposes. It is only meant to be
1643 used in situations where quality of the random numbers returned
1644 doesn't really matter. */
1647 random_number (int max)
1655 srand (time (NULL));
1660 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1661 and enforce that assumption by masking other bits. */
1663 # define RAND_MAX 32767
1667 /* This is equivalent to rand() % max, but uses the high-order bits
1668 for better randomness on architecture where rand() is implemented
1669 using a simple congruential generator. */
1671 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1672 return (int)bounded;
1675 /* Return a random uniformly distributed floating point number in the
1676 [0, 1) range. The precision of returned numbers is 9 digits.
1678 Modify this to use erand48() where available! */
1683 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1684 sure it's greater than 1000. */
1685 int rnd1 = random_number (1000);
1686 int rnd2 = random_number (1000);
1687 int rnd3 = random_number (1000);
1688 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1692 /* A debugging function for checking whether an MD5 library works. */
1694 #include "gen-md5.h"
1697 debug_test_md5 (char *buf)
1699 unsigned char raw[16];
1700 static char res[33];
1704 ALLOCA_MD5_CONTEXT (ctx);
1707 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1708 gen_md5_finish (ctx, raw);
1715 *p2++ = XNUM_TO_digit (*p1 >> 4);
1716 *p2++ = XNUM_TO_digit (*p1 & 0xf);
1725 /* Implementation of run_with_timeout, a generic timeout-forcing
1726 routine for systems with Unix-like signal handling. */
1728 #ifdef USE_SIGNAL_TIMEOUT
1729 # ifdef HAVE_SIGSETJMP
1730 # define SETJMP(env) sigsetjmp (env, 1)
1732 static sigjmp_buf run_with_timeout_env;
1735 abort_run_with_timeout (int sig)
1737 assert (sig == SIGALRM);
1738 siglongjmp (run_with_timeout_env, -1);
1740 # else /* not HAVE_SIGSETJMP */
1741 # define SETJMP(env) setjmp (env)
1743 static jmp_buf run_with_timeout_env;
1746 abort_run_with_timeout (int sig)
1748 assert (sig == SIGALRM);
1749 /* We don't have siglongjmp to preserve the set of blocked signals;
1750 if we longjumped out of the handler at this point, SIGALRM would
1751 remain blocked. We must unblock it manually. */
1752 int mask = siggetmask ();
1753 mask &= ~sigmask (SIGALRM);
1756 /* Now it's safe to longjump. */
1757 longjmp (run_with_timeout_env, -1);
1759 # endif /* not HAVE_SIGSETJMP */
1761 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1762 setitimer where available, alarm otherwise.
1764 TIMEOUT should be non-zero. If the timeout value is so small that
1765 it would be rounded to zero, it is rounded to the least legal value
1766 instead (1us for setitimer, 1s for alarm). That ensures that
1767 SIGALRM will be delivered in all cases. */
1770 alarm_set (double timeout)
1773 /* Use the modern itimer interface. */
1774 struct itimerval itv;
1776 itv.it_value.tv_sec = (long) timeout;
1777 itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1778 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1779 /* Ensure that we wait for at least the minimum interval.
1780 Specifying zero would mean "wait forever". */
1781 itv.it_value.tv_usec = 1;
1782 setitimer (ITIMER_REAL, &itv, NULL);
1783 #else /* not ITIMER_REAL */
1784 /* Use the old alarm() interface. */
1785 int secs = (int) timeout;
1787 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1788 because alarm(0) means "never deliver the alarm", i.e. "wait
1789 forever", which is not what someone who specifies a 0.5s
1790 timeout would expect. */
1793 #endif /* not ITIMER_REAL */
1796 /* Cancel the alarm set with alarm_set. */
1802 struct itimerval disable;
1804 setitimer (ITIMER_REAL, &disable, NULL);
1805 #else /* not ITIMER_REAL */
1807 #endif /* not ITIMER_REAL */
1810 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1811 seconds. Returns non-zero if the function was interrupted with a
1812 timeout, zero otherwise.
1814 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1815 using setitimer() or alarm(). The timeout is enforced by
1816 longjumping out of the SIGALRM handler. This has several
1817 advantages compared to the traditional approach of relying on
1818 signals causing system calls to exit with EINTR:
1820 * The callback function is *forcibly* interrupted after the
1821 timeout expires, (almost) regardless of what it was doing and
1822 whether it was in a syscall. For example, a calculation that
1823 takes a long time is interrupted as reliably as an IO
1826 * It works with both SYSV and BSD signals because it doesn't
1827 depend on the default setting of SA_RESTART.
1829 * It doesn't special handler setup beyond a simple call to
1830 signal(). (It does use sigsetjmp/siglongjmp, but they're
1833 The only downside is that, if FUN allocates internal resources that
1834 are normally freed prior to exit from the functions, they will be
1835 lost in case of timeout. */
1838 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1848 signal (SIGALRM, abort_run_with_timeout);
1849 if (SETJMP (run_with_timeout_env) != 0)
1851 /* Longjumped out of FUN with a timeout. */
1852 signal (SIGALRM, SIG_DFL);
1855 alarm_set (timeout);
1858 /* Preserve errno in case alarm() or signal() modifies it. */
1859 saved_errno = errno;
1861 signal (SIGALRM, SIG_DFL);
1862 errno = saved_errno;
1867 #else /* not USE_SIGNAL_TIMEOUT */
1870 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1871 define it under Windows, because Windows has its own version of
1872 run_with_timeout that uses threads. */
1875 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1880 #endif /* not WINDOWS */
1881 #endif /* not USE_SIGNAL_TIMEOUT */
1885 /* Sleep the specified amount of seconds. On machines without
1886 nanosleep(), this may sleep shorter if interrupted by signals. */
1889 xsleep (double seconds)
1891 #ifdef HAVE_NANOSLEEP
1892 /* nanosleep is the preferred interface because it offers high
1893 accuracy and, more importantly, because it allows us to reliably
1894 restart after having been interrupted by a signal such as
1896 struct timespec sleep, remaining;
1897 sleep.tv_sec = (long) seconds;
1898 sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);
1899 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1900 /* If nanosleep has been interrupted by a signal, adjust the
1901 sleeping period and return to sleep. */
1903 #else /* not HAVE_NANOSLEEP */
1905 /* If usleep is available, use it in preference to select. */
1908 /* usleep apparently accepts unsigned long, which means it can't
1909 sleep longer than ~70 min (35min if signed). If the period
1910 is larger than what usleep can safely handle, use sleep
1911 first, then add usleep for subsecond accuracy. */
1913 seconds -= (long) seconds;
1915 usleep (seconds * 1000000L);
1916 #else /* not HAVE_USLEEP */
1918 struct timeval sleep;
1919 sleep.tv_sec = (long) seconds;
1920 sleep.tv_usec = 1000000L * (seconds - (long) seconds);
1921 select (0, NULL, NULL, NULL, &sleep);
1922 /* If select returns -1 and errno is EINTR, it means we were
1923 interrupted by a signal. But without knowing how long we've
1924 actually slept, we can't return to sleep. Using gettimeofday to
1925 track sleeps is slow and unreliable due to clock skew. */
1926 #else /* not HAVE_SELECT */
1928 #endif /* not HAVE_SELECT */
1929 #endif /* not HAVE_USLEEP */
1930 #endif /* not HAVE_NANOSLEEP */
1933 #endif /* not WINDOWS */