1 /* Various utility functions.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
36 #else /* not HAVE_STRING_H */
38 #endif /* not HAVE_STRING_H */
39 #include <sys/types.h>
44 # include <sys/mman.h>
53 #ifdef HAVE_SYS_UTIME_H
54 # include <sys/utime.h>
58 # include <libc.h> /* for access() */
63 /* For TIOCGWINSZ and friends: */
64 #ifdef HAVE_SYS_IOCTL_H
65 # include <sys/ioctl.h>
71 /* Needed for run_with_timeout. */
72 #undef USE_SIGNAL_TIMEOUT
80 #ifndef HAVE_SIGSETJMP
81 /* If sigsetjmp is a macro, configure won't pick it up. */
83 # define HAVE_SIGSETJMP
88 # ifdef HAVE_SIGSETJMP
89 # define USE_SIGNAL_TIMEOUT
92 # define USE_SIGNAL_TIMEOUT
104 /* Utility function: like xstrdup(), but also lowercases S. */
107 xstrdup_lower (const char *s)
109 char *copy = xstrdup (s);
116 /* Return a count of how many times CHR occurs in STRING. */
119 count_char (const char *string, char chr)
123 for (p = string; *p; p++)
129 /* Copy the string formed by two pointers (one on the beginning, other
130 on the char after the last char) to a new, malloc-ed location.
133 strdupdelim (const char *beg, const char *end)
135 char *res = (char *)xmalloc (end - beg + 1);
136 memcpy (res, beg, end - beg);
137 res[end - beg] = '\0';
141 /* Parse a string containing comma-separated elements, and return a
142 vector of char pointers with the elements. Spaces following the
143 commas are ignored. */
145 sepstring (const char *s)
159 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
160 res[i] = strdupdelim (p, s);
163 /* Skip the blanks following the ','. */
171 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
172 res[i] = strdupdelim (p, s);
177 /* Return pointer to a static char[] buffer in which zero-terminated
178 string-representation of TM (in form hh:mm:ss) is printed.
180 If TM is NULL, the current time will be used. */
183 time_str (time_t *tm)
185 static char output[15];
187 time_t secs = tm ? *tm : time (NULL);
191 /* In case of error, return the empty string. Maybe we should
192 just abort if this happens? */
196 ptm = localtime (&secs);
197 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
201 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
204 datetime_str (time_t *tm)
206 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
208 time_t secs = tm ? *tm : time (NULL);
212 /* In case of error, return the empty string. Maybe we should
213 just abort if this happens? */
217 ptm = localtime (&secs);
218 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
219 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
220 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
224 /* The Windows versions of the following two functions are defined in
229 fork_to_background (void)
232 /* Whether we arrange our own version of opt.lfilename here. */
237 opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
249 /* parent, no error */
250 printf (_("Continuing in background, pid %d.\n"), (int)pid);
252 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
253 exit (0); /* #### should we use _exit()? */
256 /* child: give up the privileges and keep running. */
258 freopen ("/dev/null", "r", stdin);
259 freopen ("/dev/null", "w", stdout);
260 freopen ("/dev/null", "w", stderr);
262 #endif /* not WINDOWS */
264 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
265 specified with TM. */
267 touch (const char *file, time_t tm)
269 #ifdef HAVE_STRUCT_UTIMBUF
270 struct utimbuf times;
271 times.actime = times.modtime = tm;
274 times[0] = times[1] = tm;
277 if (utime (file, ×) == -1)
278 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
281 /* Checks if FILE is a symbolic link, and removes it if it is. Does
282 nothing under MS-Windows. */
284 remove_link (const char *file)
289 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
291 DEBUGP (("Unlinking %s (symlink).\n", file));
294 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
295 file, strerror (errno));
300 /* Does FILENAME exist? This is quite a lousy implementation, since
301 it supplies no error codes -- only a yes-or-no answer. Thus it
302 will return that a file does not exist if, e.g., the directory is
303 unreadable. I don't mind it too much currently, though. The
304 proper way should, of course, be to have a third, error state,
305 other than true/false, but that would introduce uncalled-for
306 additional complexity to the callers. */
308 file_exists_p (const char *filename)
311 return access (filename, F_OK) >= 0;
314 return stat (filename, &buf) >= 0;
318 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
319 Returns 0 on error. */
321 file_non_directory_p (const char *path)
324 /* Use lstat() rather than stat() so that symbolic links pointing to
325 directories can be identified correctly. */
326 if (lstat (path, &buf) != 0)
328 return S_ISDIR (buf.st_mode) ? 0 : 1;
331 /* Return the size of file named by FILENAME, or -1 if it cannot be
332 opened or seeked into. */
334 file_size (const char *filename)
337 /* We use fseek rather than stat to determine the file size because
338 that way we can also verify whether the file is readable.
339 Inspired by the POST patch by Arnaud Wylie. */
340 FILE *fp = fopen (filename, "rb");
343 fseek (fp, 0, SEEK_END);
349 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
350 doesn't exist is found. Return a freshly allocated copy of the
354 unique_name_1 (const char *prefix)
357 int plen = strlen (prefix);
358 char *template = (char *)alloca (plen + 1 + 24);
359 char *template_tail = template + plen;
361 memcpy (template, prefix, plen);
362 *template_tail++ = '.';
365 number_to_string (template_tail, count++);
366 while (file_exists_p (template));
368 return xstrdup (template);
371 /* Return a unique file name, based on FILE.
373 More precisely, if FILE doesn't exist, it is returned unmodified.
374 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
375 file name that doesn't exist is returned.
377 The resulting file is not created, only verified that it didn't
378 exist at the point in time when the function was called.
379 Therefore, where security matters, don't rely that the file created
380 by this function exists until you open it with O_EXCL or
383 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
384 string. Otherwise, it may return FILE if the file doesn't exist
385 (and therefore doesn't need changing). */
388 unique_name (const char *file, int allow_passthrough)
390 /* If the FILE itself doesn't exist, return it without
392 if (!file_exists_p (file))
393 return allow_passthrough ? (char *)file : xstrdup (file);
395 /* Otherwise, find a numeric suffix that results in unused file name
397 return unique_name_1 (file);
400 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
401 are missing, create them first. In case any mkdir() call fails,
402 return its error status. Returns 0 on successful completion.
404 The behaviour of this function should be identical to the behaviour
405 of `mkdir -p' on systems where mkdir supports the `-p' option. */
407 make_directory (const char *directory)
414 /* Make a copy of dir, to be able to write to it. Otherwise, the
415 function is unsafe if called with a read-only char *argument. */
416 STRDUP_ALLOCA (dir, directory);
418 /* If the first character of dir is '/', skip it (and thus enable
419 creation of absolute-pathname directories. */
420 for (i = (*dir == '/'); 1; ++i)
422 for (; dir[i] && dir[i] != '/'; i++)
427 /* Check whether the directory already exists. Allow creation of
428 of intermediate directories to fail, as the initial path components
429 are not necessarily directories! */
430 if (!file_exists_p (dir))
431 ret = mkdir (dir, 0777);
442 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
443 should be a file name.
445 file_merge("/foo/bar", "baz") => "/foo/baz"
446 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
447 file_merge("foo", "bar") => "bar"
449 In other words, it's a simpler and gentler version of uri_merge_1. */
452 file_merge (const char *base, const char *file)
455 const char *cut = (const char *)strrchr (base, '/');
458 return xstrdup (file);
460 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
461 memcpy (result, base, cut - base);
462 result[cut - base] = '/';
463 strcpy (result + (cut - base) + 1, file);
468 static int in_acclist PARAMS ((const char *const *, const char *, int));
470 /* Determine whether a file is acceptable to be followed, according to
471 lists of patterns to accept/reject. */
473 acceptable (const char *s)
477 while (l && s[l] != '/')
484 return (in_acclist ((const char *const *)opt.accepts, s, 1)
485 && !in_acclist ((const char *const *)opt.rejects, s, 1));
487 return in_acclist ((const char *const *)opt.accepts, s, 1);
489 else if (opt.rejects)
490 return !in_acclist ((const char *const *)opt.rejects, s, 1);
494 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
495 `/something', frontcmp() will return 1 only if S2 begins with
496 `/something'. Otherwise, 0 is returned. */
498 frontcmp (const char *s1, const char *s2)
500 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
504 /* Iterate through STRLIST, and return the first element that matches
505 S, through wildcards or front comparison (as appropriate). */
507 proclist (char **strlist, const char *s, enum accd flags)
511 for (x = strlist; *x; x++)
512 if (has_wildcards_p (*x))
514 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
519 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
526 /* Returns whether DIRECTORY is acceptable for download, wrt the
527 include/exclude lists.
529 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
530 and absolute paths may be freely intermixed. */
532 accdir (const char *directory, enum accd flags)
534 /* Remove starting '/'. */
535 if (flags & ALLABS && *directory == '/')
539 if (!proclist (opt.includes, directory, flags))
544 if (proclist (opt.excludes, directory, flags))
550 /* Return non-zero if STRING ends with TAIL. For instance:
552 match_tail ("abc", "bc", 0) -> 1
553 match_tail ("abc", "ab", 0) -> 0
554 match_tail ("abc", "abc", 0) -> 1
556 If FOLD_CASE_P is non-zero, the comparison will be
560 match_tail (const char *string, const char *tail, int fold_case_p)
564 /* We want this to be fast, so we code two loops, one with
565 case-folding, one without. */
569 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
570 if (string[i] != tail[j])
575 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
576 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
580 /* If the tail was exhausted, the match was succesful. */
587 /* Checks whether string S matches each element of ACCEPTS. A list
588 element are matched either with fnmatch() or match_tail(),
589 according to whether the element contains wildcards or not.
591 If the BACKWARD is 0, don't do backward comparison -- just compare
594 in_acclist (const char *const *accepts, const char *s, int backward)
596 for (; *accepts; accepts++)
598 if (has_wildcards_p (*accepts))
600 /* fnmatch returns 0 if the pattern *does* match the
602 if (fnmatch (*accepts, s, 0) == 0)
609 if (match_tail (s, *accepts, 0))
614 if (!strcmp (s, *accepts))
622 /* Return the location of STR's suffix (file extension). Examples:
623 suffix ("foo.bar") -> "bar"
624 suffix ("foo.bar.baz") -> "baz"
625 suffix ("/foo/bar") -> NULL
626 suffix ("/foo.bar/baz") -> NULL */
628 suffix (const char *str)
632 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
636 return (char *)str + i;
641 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
645 has_wildcards_p (const char *s)
648 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
653 /* Return non-zero if FNAME ends with a typical HTML suffix. The
654 following (case-insensitive) suffixes are presumed to be HTML files:
658 ?html (`?' matches one character)
660 #### CAVEAT. This is not necessarily a good indication that FNAME
661 refers to a file that contains HTML! */
663 has_html_suffix_p (const char *fname)
667 if ((suf = suffix (fname)) == NULL)
669 if (!strcasecmp (suf, "html"))
671 if (!strcasecmp (suf, "htm"))
673 if (suf[0] && !strcasecmp (suf + 1, "html"))
678 /* Read a line from FP and return the pointer to freshly allocated
679 storage. The storage space is obtained through malloc() and should
680 be freed with free() when it is no longer needed.
682 The length of the line is not limited, except by available memory.
683 The newline character at the end of line is retained. The line is
684 terminated with a zero character.
686 After end-of-file is encountered without anything being read, NULL
687 is returned. NULL is also returned on error. To distinguish
688 between these two cases, use the stdio function ferror(). */
691 read_whole_line (FILE *fp)
695 char *line = (char *)xmalloc (bufsize);
697 while (fgets (line + length, bufsize - length, fp))
699 length += strlen (line + length);
701 /* Possible for example when reading from a binary file where
702 a line begins with \0. */
705 if (line[length - 1] == '\n')
708 /* fgets() guarantees to read the whole line, or to use up the
709 space we've given it. We can double the buffer
712 line = xrealloc (line, bufsize);
714 if (length == 0 || ferror (fp))
719 if (length + 1 < bufsize)
720 /* Relieve the memory from our exponential greediness. We say
721 `length + 1' because the terminating \0 is not included in
722 LENGTH. We don't need to zero-terminate the string ourselves,
723 though, because fgets() does that. */
724 line = xrealloc (line, length + 1);
728 /* Read FILE into memory. A pointer to `struct file_memory' are
729 returned; use struct element `content' to access file contents, and
730 the element `length' to know the file length. `content' is *not*
731 zero-terminated, and you should *not* read or write beyond the [0,
732 length) range of characters.
734 After you are done with the file contents, call read_file_free to
737 Depending on the operating system and the type of file that is
738 being read, read_file() either mmap's the file into memory, or
739 reads the file into the core using read().
741 If file is named "-", fileno(stdin) is used for reading instead.
742 If you want to read from a real file named "-", use "./-" instead. */
745 read_file (const char *file)
748 struct file_memory *fm;
750 int inhibit_close = 0;
752 /* Some magic in the finest tradition of Perl and its kin: if FILE
753 is "-", just use stdin. */
758 /* Note that we don't inhibit mmap() in this case. If stdin is
759 redirected from a regular file, mmap() will still work. */
762 fd = open (file, O_RDONLY);
765 fm = xnew (struct file_memory);
770 if (fstat (fd, &buf) < 0)
772 fm->length = buf.st_size;
773 /* NOTE: As far as I know, the callers of this function never
774 modify the file text. Relying on this would enable us to
775 specify PROT_READ and MAP_SHARED for a marginal gain in
776 efficiency, but at some cost to generality. */
777 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
779 if (fm->content == (char *)MAP_FAILED)
789 /* The most common reason why mmap() fails is that FD does not point
790 to a plain file. However, it's also possible that mmap() doesn't
791 work for a particular type of file. Therefore, whenever mmap()
792 fails, we just fall back to the regular method. */
793 #endif /* HAVE_MMAP */
796 size = 512; /* number of bytes fm->contents can
797 hold at any given time. */
798 fm->content = xmalloc (size);
802 if (fm->length > size / 2)
804 /* #### I'm not sure whether the whole exponential-growth
805 thing makes sense with kernel read. On Linux at least,
806 read() refuses to read more than 4K from a file at a
807 single chunk anyway. But other Unixes might optimize it
808 better, and it doesn't *hurt* anything, so I'm leaving
811 /* Normally, we grow SIZE exponentially to make the number
812 of calls to read() and realloc() logarithmic in relation
813 to file size. However, read() can read an amount of data
814 smaller than requested, and it would be unreasonable to
815 double SIZE every time *something* was read. Therefore,
816 we double SIZE only when the length exceeds half of the
817 entire allocated size. */
819 fm->content = xrealloc (fm->content, size);
821 nread = read (fd, fm->content + fm->length, size - fm->length);
823 /* Successful read. */
834 if (size > fm->length && fm->length != 0)
835 /* Due to exponential growth of fm->content, the allocated region
836 might be much larger than what is actually needed. */
837 fm->content = xrealloc (fm->content, fm->length);
849 /* Release the resources held by FM. Specifically, this calls
850 munmap() or xfree() on fm->content, depending whether mmap or
851 malloc/read were used to read in the file. It also frees the
852 memory needed to hold the FM structure itself. */
855 read_file_free (struct file_memory *fm)
860 munmap (fm->content, fm->length);
870 /* Free the pointers in a NULL-terminated vector of pointers, then
871 free the pointer itself. */
873 free_vec (char **vec)
884 /* Append vector V2 to vector V1. The function frees V2 and
885 reallocates V1 (thus you may not use the contents of neither
886 pointer after the call). If V1 is NULL, V2 is returned. */
888 merge_vecs (char **v1, char **v2)
898 /* To avoid j == 0 */
903 for (i = 0; v1[i]; i++);
905 for (j = 0; v2[j]; j++);
907 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
908 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
913 /* A set of simple-minded routines to store strings in a linked list.
914 This used to also be used for searching, but now we have hash
917 /* It's a shame that these simple things like linked lists and hash
918 tables (see hash.c) need to be implemented over and over again. It
919 would be nice to be able to use the routines from glib -- see
920 www.gtk.org for details. However, that would make Wget depend on
921 glib, and I want to avoid dependencies to external libraries for
922 reasons of convenience and portability (I suspect Wget is more
923 portable than anything ever written for Gnome). */
925 /* Append an element to the list. If the list has a huge number of
926 elements, this can get slow because it has to find the list's
927 ending. If you think you have to call slist_append in a loop,
928 think about calling slist_prepend() followed by slist_nreverse(). */
931 slist_append (slist *l, const char *s)
933 slist *newel = xnew (slist);
936 newel->string = xstrdup (s);
941 /* Find the last element. */
948 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
951 slist_prepend (slist *l, const char *s)
953 slist *newel = xnew (slist);
954 newel->string = xstrdup (s);
959 /* Destructively reverse L. */
962 slist_nreverse (slist *l)
967 slist *next = l->next;
975 /* Is there a specific entry in the list? */
977 slist_contains (slist *l, const char *s)
979 for (; l; l = l->next)
980 if (!strcmp (l->string, s))
985 /* Free the whole slist. */
987 slist_free (slist *l)
998 /* Sometimes it's useful to create "sets" of strings, i.e. special
999 hash tables where you want to store strings as keys and merely
1000 query for their existence. Here is a set of utility routines that
1001 makes that transparent. */
1004 string_set_add (struct hash_table *ht, const char *s)
1006 /* First check whether the set element already exists. If it does,
1007 do nothing so that we don't have to free() the old element and
1008 then strdup() a new one. */
1009 if (hash_table_contains (ht, s))
1012 /* We use "1" as value. It provides us a useful and clear arbitrary
1013 value, and it consumes no memory -- the pointers to the same
1014 string "1" will be shared by all the key-value pairs in all `set'
1016 hash_table_put (ht, xstrdup (s), "1");
1019 /* Synonym for hash_table_contains... */
1022 string_set_contains (struct hash_table *ht, const char *s)
1024 return hash_table_contains (ht, s);
1028 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1035 string_set_free (struct hash_table *ht)
1037 hash_table_map (ht, string_set_free_mapper, NULL);
1038 hash_table_destroy (ht);
1042 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1049 /* Another utility function: call free() on all keys and values of HT. */
1052 free_keys_and_values (struct hash_table *ht)
1054 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1058 /* Engine for legible and legible_large_int; add thousand separators
1059 to numbers printed in strings. */
1062 legible_1 (const char *repr)
1064 static char outbuf[48];
1069 /* Reset the pointers. */
1073 /* Ignore the sign for the purpose of adding thousand
1080 /* How many digits before the first separator? */
1081 mod = strlen (inptr) % 3;
1083 for (i = 0; i < mod; i++)
1084 *outptr++ = inptr[i];
1085 /* Now insert the rest of them, putting separator before every
1087 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1089 if (i % 3 == 0 && i1 != 0)
1091 *outptr++ = inptr[i1];
1093 /* Zero-terminate the string. */
1098 /* Legible -- return a static pointer to the legibly printed long. */
1104 /* Print the number into the buffer. */
1105 number_to_string (inbuf, l);
1106 return legible_1 (inbuf);
1109 /* Write a string representation of LARGE_INT NUMBER into the provided
1110 buffer. The buffer should be able to accept 24 characters,
1111 including the terminating zero.
1113 It would be dangerous to use sprintf, because the code wouldn't
1114 work on a machine with gcc-provided long long support, but without
1115 libc support for "%lld". However, such platforms will typically
1116 not have snprintf and will use our version, which does support
1117 "%lld" where long longs are available. */
1120 large_int_to_string (char *buffer, LARGE_INT number)
1122 snprintf (buffer, 24, LARGE_INT_FMT, number);
1125 /* The same as legible(), but works on LARGE_INT. */
1128 legible_large_int (LARGE_INT l)
1131 large_int_to_string (inbuf, l);
1132 return legible_1 (inbuf);
1135 /* Count the digits in a (long) integer. */
1137 numdigit (long number)
1145 while ((number /= 10) > 0)
1150 /* A half-assed implementation of INT_MAX on machines that don't
1151 bother to define one. */
1153 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1156 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1157 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1159 #define DIGITS_1(figure) ONE_DIGIT (figure)
1160 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1161 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1162 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1163 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1164 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1165 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1166 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1167 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1168 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1170 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1172 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1173 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1174 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1175 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1176 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1177 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1178 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1179 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1180 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1182 /* Print NUMBER to BUFFER in base 10. This should be completely
1183 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1185 The speedup may make a difference in programs that frequently
1186 convert numbers to strings. Some implementations of sprintf,
1187 particularly the one in GNU libc, have been known to be extremely
1188 slow compared to this function.
1190 Return the pointer to the location where the terminating zero was
1191 printed. (Equivalent to calling buffer+strlen(buffer) after the
1194 BUFFER should be big enough to accept as many bytes as you expect
1195 the number to take up. On machines with 64-bit longs the maximum
1196 needed size is 24 bytes. That includes the digits needed for the
1197 largest 64-bit number, the `-' sign in case it's negative, and the
1198 terminating '\0'. */
1201 number_to_string (char *buffer, long number)
1206 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1207 /* We are running in a strange or misconfigured environment. Let
1208 sprintf cope with it. */
1209 sprintf (buffer, "%ld", n);
1210 p += strlen (buffer);
1211 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1217 /* We cannot print a '-' and assign -n to n because -n would
1218 overflow. Let sprintf deal with this border case. */
1219 sprintf (buffer, "%ld", n);
1220 p += strlen (buffer);
1228 if (n < 10) { DIGITS_1 (1); }
1229 else if (n < 100) { DIGITS_2 (10); }
1230 else if (n < 1000) { DIGITS_3 (100); }
1231 else if (n < 10000) { DIGITS_4 (1000); }
1232 else if (n < 100000) { DIGITS_5 (10000); }
1233 else if (n < 1000000) { DIGITS_6 (100000); }
1234 else if (n < 10000000) { DIGITS_7 (1000000); }
1235 else if (n < 100000000) { DIGITS_8 (10000000); }
1236 else if (n < 1000000000) { DIGITS_9 (100000000); }
1237 #if SIZEOF_LONG == 4
1238 /* ``if (1)'' serves only to preserve editor indentation. */
1239 else if (1) { DIGITS_10 (1000000000); }
1240 #else /* SIZEOF_LONG != 4 */
1241 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1242 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1243 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1244 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1245 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1246 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1247 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1248 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1249 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1250 else { DIGITS_19 (1000000000000000000L); }
1251 #endif /* SIZEOF_LONG != 4 */
1254 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1260 #undef ONE_DIGIT_ADVANCE
1282 /* Support for timers. */
1284 #undef TIMER_WINDOWS
1285 #undef TIMER_GETTIMEOFDAY
1288 /* Depending on the OS and availability of gettimeofday(), one and
1289 only one of the above constants will be defined. Virtually all
1290 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1291 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1292 non-Windows systems without gettimeofday.
1294 #### Perhaps we should also support ftime(), which exists on old
1295 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1296 C, if memory serves me.) */
1299 # define TIMER_WINDOWS
1300 #else /* not WINDOWS */
1301 # ifdef HAVE_GETTIMEOFDAY
1302 # define TIMER_GETTIMEOFDAY
1306 #endif /* not WINDOWS */
1308 #ifdef TIMER_GETTIMEOFDAY
1309 typedef struct timeval wget_sys_time;
1313 typedef time_t wget_sys_time;
1316 #ifdef TIMER_WINDOWS
1317 typedef ULARGE_INTEGER wget_sys_time;
1321 /* The starting point in time which, subtracted from the current
1322 time, yields elapsed time. */
1323 wget_sys_time start;
1325 /* The most recent elapsed time, calculated by wtimer_elapsed().
1326 Measured in milliseconds. */
1327 double elapsed_last;
1329 /* Approximately, the time elapsed between the true start of the
1330 measurement and the time represented by START. */
1331 double elapsed_pre_start;
1334 /* Allocate a timer. It is not legal to do anything with a freshly
1335 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1338 wtimer_allocate (void)
1340 struct wget_timer *wt = xnew (struct wget_timer);
1344 /* Allocate a new timer and reset it. Return the new timer. */
1349 struct wget_timer *wt = wtimer_allocate ();
1354 /* Free the resources associated with the timer. Its further use is
1358 wtimer_delete (struct wget_timer *wt)
1363 /* Store system time to WST. */
1366 wtimer_sys_set (wget_sys_time *wst)
1368 #ifdef TIMER_GETTIMEOFDAY
1369 gettimeofday (wst, NULL);
1376 #ifdef TIMER_WINDOWS
1377 /* We use GetSystemTime to get the elapsed time. MSDN warns that
1378 system clock adjustments can skew the output of GetSystemTime
1379 when used as a timer and gives preference to GetTickCount and
1380 high-resolution timers. But GetTickCount can overflow, and hires
1381 timers are typically used for profiling, not for regular time
1382 measurement. Since we handle clock skew anyway, we just use
1386 GetSystemTime (&st);
1388 /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1389 FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1390 arithmetic on that. */
1391 SystemTimeToFileTime (&st, &ft);
1392 wst->HighPart = ft.dwHighDateTime;
1393 wst->LowPart = ft.dwLowDateTime;
1397 /* Reset timer WT. This establishes the starting point from which
1398 wtimer_elapsed() will return the number of elapsed milliseconds.
1399 It is allowed to reset a previously used timer.
1401 If a non-zero value is used as START, the timer's values will be
1405 wtimer_reset (struct wget_timer *wt)
1407 /* Set the start time to the current time. */
1408 wtimer_sys_set (&wt->start);
1409 wt->elapsed_last = 0;
1410 wt->elapsed_pre_start = 0;
1414 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1416 #ifdef TIMER_GETTIMEOFDAY
1417 return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1418 + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1422 return 1000 * (*wst1 - *wst2);
1426 /* VC++ 6 doesn't support direct cast of uint64 to double. To work
1427 around this, we subtract, then convert to signed, then finally to
1429 return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1433 /* Update the timer's elapsed interval. This function causes the
1434 timer to call gettimeofday (or time(), etc.) to update its idea of
1435 current time. To get the elapsed interval in milliseconds, use
1438 This function handles clock skew, i.e. time that moves backwards is
1442 wtimer_update (struct wget_timer *wt)
1447 wtimer_sys_set (&now);
1448 elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1450 /* Ideally we'd just return the difference between NOW and
1451 wt->start. However, the system timer can be set back, and we
1452 could return a value smaller than when we were last called, even
1453 a negative value. Both of these would confuse the callers, which
1454 expect us to return monotonically nondecreasing values.
1456 Therefore: if ELAPSED is smaller than its previous known value,
1457 we reset wt->start to the current time and effectively start
1458 measuring from this point. But since we don't want the elapsed
1459 value to start from zero, we set elapsed_pre_start to the last
1460 elapsed time and increment all future calculations by that
1463 if (elapsed < wt->elapsed_last)
1466 wt->elapsed_pre_start = wt->elapsed_last;
1467 elapsed = wt->elapsed_last;
1470 wt->elapsed_last = elapsed;
1473 /* Return the elapsed time in milliseconds between the last call to
1474 wtimer_reset and the last call to wtimer_update.
1476 A typical use of the timer interface would be:
1478 struct wtimer *timer = wtimer_new ();
1479 ... do something that takes a while ...
1481 double msecs = wtimer_read (); */
1484 wtimer_read (const struct wget_timer *wt)
1486 return wt->elapsed_last;
1489 /* Return the assessed granularity of the timer implementation, in
1490 milliseconds. This is used by code that tries to substitute a
1491 better value for timers that have returned zero. */
1494 wtimer_granularity (void)
1496 #ifdef TIMER_GETTIMEOFDAY
1497 /* Granularity of gettimeofday varies wildly between architectures.
1498 However, it appears that on modern machines it tends to be better
1499 than 1ms. Assume 100 usecs. (Perhaps the configure process
1500 could actually measure this?) */
1508 #ifdef TIMER_WINDOWS
1509 /* According to MSDN, GetSystemTime returns a broken-down time
1510 structure the smallest member of which are milliseconds. */
1515 /* This should probably be at a better place, but it doesn't really
1516 fit into html-parse.c. */
1518 /* The function returns the pointer to the malloc-ed quoted version of
1519 string s. It will recognize and quote numeric and special graphic
1520 entities, as per RFC1866:
1528 No other entities are recognized or replaced. */
1530 html_quote_string (const char *s)
1536 /* Pass through the string, and count the new size. */
1537 for (i = 0; *s; s++, i++)
1540 i += 4; /* `amp;' */
1541 else if (*s == '<' || *s == '>')
1542 i += 3; /* `lt;' and `gt;' */
1543 else if (*s == '\"')
1544 i += 5; /* `quot;' */
1548 res = (char *)xmalloc (i + 1);
1550 for (p = res; *s; s++)
1563 *p++ = (*s == '<' ? 'l' : 'g');
1590 /* Determine the width of the terminal we're running on. If that's
1591 not possible, return 0. */
1594 determine_screen_width (void)
1596 /* If there's a way to get the terminal size using POSIX
1597 tcgetattr(), somebody please tell me. */
1600 #else /* TIOCGWINSZ */
1604 if (opt.lfilename != NULL)
1607 fd = fileno (stderr);
1608 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1609 return 0; /* most likely ENOTTY */
1612 #endif /* TIOCGWINSZ */
1615 /* Return a random number between 0 and MAX-1, inclusive.
1617 If MAX is greater than the value of RAND_MAX+1 on the system, the
1618 returned value will be in the range [0, RAND_MAX]. This may be
1619 fixed in a future release.
1621 The random number generator is seeded automatically the first time
1624 This uses rand() for portability. It has been suggested that
1625 random() offers better randomness, but this is not required for
1626 Wget, so I chose to go for simplicity and use rand
1629 DO NOT use this for cryptographic purposes. It is only meant to be
1630 used in situations where quality of the random numbers returned
1631 doesn't really matter. */
1634 random_number (int max)
1642 srand (time (NULL));
1647 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1648 and enforce that assumption by masking other bits. */
1650 # define RAND_MAX 32767
1654 /* This is equivalent to rand() % max, but uses the high-order bits
1655 for better randomness on architecture where rand() is implemented
1656 using a simple congruential generator. */
1658 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1659 return (int)bounded;
1662 /* Return a random uniformly distributed floating point number in the
1663 [0, 1) range. The precision of returned numbers is 9 digits.
1665 Modify this to use erand48() where available! */
1670 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1671 sure it's greater than 1000. */
1672 int rnd1 = random_number (1000);
1673 int rnd2 = random_number (1000);
1674 int rnd3 = random_number (1000);
1675 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1679 /* A debugging function for checking whether an MD5 library works. */
1681 #include "gen-md5.h"
1684 debug_test_md5 (char *buf)
1686 unsigned char raw[16];
1687 static char res[33];
1691 ALLOCA_MD5_CONTEXT (ctx);
1694 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1695 gen_md5_finish (ctx, raw);
1702 *p2++ = XNUM_TO_digit (*p1 >> 4);
1703 *p2++ = XNUM_TO_digit (*p1 & 0xf);
1712 /* Implementation of run_with_timeout, a generic timeout-forcing
1713 routine for systems with Unix-like signal handling. */
1715 #ifdef USE_SIGNAL_TIMEOUT
1716 # ifdef HAVE_SIGSETJMP
1717 # define SETJMP(env) sigsetjmp (env, 1)
1719 static sigjmp_buf run_with_timeout_env;
1722 abort_run_with_timeout (int sig)
1724 assert (sig == SIGALRM);
1725 siglongjmp (run_with_timeout_env, -1);
1727 # else /* not HAVE_SIGSETJMP */
1728 # define SETJMP(env) setjmp (env)
1730 static jmp_buf run_with_timeout_env;
1733 abort_run_with_timeout (int sig)
1735 assert (sig == SIGALRM);
1736 /* We don't have siglongjmp to preserve the set of blocked signals;
1737 if we longjumped out of the handler at this point, SIGALRM would
1738 remain blocked. We must unblock it manually. */
1739 int mask = siggetmask ();
1740 mask &= ~sigmask (SIGALRM);
1743 /* Now it's safe to longjump. */
1744 longjmp (run_with_timeout_env, -1);
1746 # endif /* not HAVE_SIGSETJMP */
1748 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1749 setitimer where available, alarm otherwise.
1751 TIMEOUT should be non-zero. If the timeout value is so small that
1752 it would be rounded to zero, it is rounded to the least legal value
1753 instead (1us for setitimer, 1s for alarm). That ensures that
1754 SIGALRM will be delivered in all cases. */
1757 alarm_set (double timeout)
1760 /* Use the modern itimer interface. */
1761 struct itimerval itv;
1763 itv.it_value.tv_sec = (long) timeout;
1764 itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1765 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1766 /* Ensure that we wait for at least the minimum interval.
1767 Specifying zero would mean "wait forever". */
1768 itv.it_value.tv_usec = 1;
1769 setitimer (ITIMER_REAL, &itv, NULL);
1770 #else /* not ITIMER_REAL */
1771 /* Use the old alarm() interface. */
1772 int secs = (int) timeout;
1774 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1775 because alarm(0) means "never deliver the alarm", i.e. "wait
1776 forever", which is not what someone who specifies a 0.5s
1777 timeout would expect. */
1780 #endif /* not ITIMER_REAL */
1783 /* Cancel the alarm set with alarm_set. */
1789 struct itimerval disable;
1791 setitimer (ITIMER_REAL, &disable, NULL);
1792 #else /* not ITIMER_REAL */
1794 #endif /* not ITIMER_REAL */
1797 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1798 seconds. Returns non-zero if the function was interrupted with a
1799 timeout, zero otherwise.
1801 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1802 using setitimer() or alarm(). The timeout is enforced by
1803 longjumping out of the SIGALRM handler. This has several
1804 advantages compared to the traditional approach of relying on
1805 signals causing system calls to exit with EINTR:
1807 * The callback function is *forcibly* interrupted after the
1808 timeout expires, (almost) regardless of what it was doing and
1809 whether it was in a syscall. For example, a calculation that
1810 takes a long time is interrupted as reliably as an IO
1813 * It works with both SYSV and BSD signals because it doesn't
1814 depend on the default setting of SA_RESTART.
1816 * It doesn't special handler setup beyond a simple call to
1817 signal(). (It does use sigsetjmp/siglongjmp, but they're
1820 The only downside is that, if FUN allocates internal resources that
1821 are normally freed prior to exit from the functions, they will be
1822 lost in case of timeout. */
1825 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1835 signal (SIGALRM, abort_run_with_timeout);
1836 if (SETJMP (run_with_timeout_env) != 0)
1838 /* Longjumped out of FUN with a timeout. */
1839 signal (SIGALRM, SIG_DFL);
1842 alarm_set (timeout);
1845 /* Preserve errno in case alarm() or signal() modifies it. */
1846 saved_errno = errno;
1848 signal (SIGALRM, SIG_DFL);
1849 errno = saved_errno;
1854 #else /* not USE_SIGNAL_TIMEOUT */
1857 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1858 define it under Windows, because Windows has its own version of
1859 run_with_timeout that uses threads. */
1862 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1867 #endif /* not WINDOWS */
1868 #endif /* not USE_SIGNAL_TIMEOUT */
1872 /* Sleep the specified amount of seconds. On machines without
1873 nanosleep(), this may sleep shorter if interrupted by signals. */
1876 xsleep (double seconds)
1878 #ifdef HAVE_NANOSLEEP
1879 /* nanosleep is the preferred interface because it offers high
1880 accuracy and, more importantly, because it allows us to reliably
1881 restart after having been interrupted by a signal such as
1883 struct timespec sleep, remaining;
1884 sleep.tv_sec = (long) seconds;
1885 sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);
1886 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1887 /* If nanosleep has been interrupted by a signal, adjust the
1888 sleeping period and return to sleep. */
1890 #else /* not HAVE_NANOSLEEP */
1892 /* If usleep is available, use it in preference to select. */
1895 /* usleep apparently accepts unsigned long, which means it can't
1896 sleep longer than ~70 min (35min if signed). If the period
1897 is larger than what usleep can safely handle, use sleep
1898 first, then add usleep for subsecond accuracy. */
1900 seconds -= (long) seconds;
1902 usleep (seconds * 1000000L);
1903 #else /* not HAVE_USLEEP */
1905 struct timeval sleep;
1906 sleep.tv_sec = (long) seconds;
1907 sleep.tv_usec = 1000000L * (seconds - (long) seconds);
1908 select (0, NULL, NULL, NULL, &sleep);
1909 /* If select returns -1 and errno is EINTR, it means we were
1910 interrupted by a signal. But without knowing how long we've
1911 actually slept, we can't return to sleep. Using gettimeofday to
1912 track sleeps is slow and unreliable due to clock skew. */
1913 #else /* not HAVE_SELECT */
1915 #endif /* not HAVE_SELECT */
1916 #endif /* not HAVE_USLEEP */
1917 #endif /* not HAVE_NANOSLEEP */
1920 #endif /* not WINDOWS */