1 /* Various utility functions.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
36 #else /* not HAVE_STRING_H */
38 #endif /* not HAVE_STRING_H */
39 #include <sys/types.h>
44 # include <sys/mman.h>
53 #ifdef HAVE_SYS_UTIME_H
54 # include <sys/utime.h>
58 # include <libc.h> /* for access() */
63 /* For TIOCGWINSZ and friends: */
64 #ifdef HAVE_SYS_IOCTL_H
65 # include <sys/ioctl.h>
71 /* Needed for run_with_timeout. */
72 #undef USE_SIGNAL_TIMEOUT
80 #ifndef HAVE_SIGSETJMP
81 /* If sigsetjmp is a macro, configure won't pick it up. */
83 # define HAVE_SIGSETJMP
88 # ifdef HAVE_SIGSETJMP
89 # define USE_SIGNAL_TIMEOUT
92 # define USE_SIGNAL_TIMEOUT
104 /* Utility function: like xstrdup(), but also lowercases S. */
107 xstrdup_lower (const char *s)
109 char *copy = xstrdup (s);
116 /* Return a count of how many times CHR occurs in STRING. */
119 count_char (const char *string, char chr)
123 for (p = string; *p; p++)
129 /* Copy the string formed by two pointers (one on the beginning, other
130 on the char after the last char) to a new, malloc-ed location.
133 strdupdelim (const char *beg, const char *end)
135 char *res = (char *)xmalloc (end - beg + 1);
136 memcpy (res, beg, end - beg);
137 res[end - beg] = '\0';
141 /* Parse a string containing comma-separated elements, and return a
142 vector of char pointers with the elements. Spaces following the
143 commas are ignored. */
145 sepstring (const char *s)
159 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
160 res[i] = strdupdelim (p, s);
163 /* Skip the blanks following the ','. */
171 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
172 res[i] = strdupdelim (p, s);
177 /* Return pointer to a static char[] buffer in which zero-terminated
178 string-representation of TM (in form hh:mm:ss) is printed.
180 If TM is NULL, the current time will be used. */
183 time_str (time_t *tm)
185 static char output[15];
187 time_t secs = tm ? *tm : time (NULL);
191 /* In case of error, return the empty string. Maybe we should
192 just abort if this happens? */
196 ptm = localtime (&secs);
197 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
201 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
204 datetime_str (time_t *tm)
206 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
208 time_t secs = tm ? *tm : time (NULL);
212 /* In case of error, return the empty string. Maybe we should
213 just abort if this happens? */
217 ptm = localtime (&secs);
218 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
219 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
220 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
224 /* The Windows versions of the following two functions are defined in
229 fork_to_background (void)
232 /* Whether we arrange our own version of opt.lfilename here. */
237 opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
249 /* parent, no error */
250 printf (_("Continuing in background, pid %d.\n"), (int)pid);
252 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
253 exit (0); /* #### should we use _exit()? */
256 /* child: give up the privileges and keep running. */
258 freopen ("/dev/null", "r", stdin);
259 freopen ("/dev/null", "w", stdout);
260 freopen ("/dev/null", "w", stderr);
262 #endif /* not WINDOWS */
264 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
265 specified with TM. */
267 touch (const char *file, time_t tm)
269 #ifdef HAVE_STRUCT_UTIMBUF
270 struct utimbuf times;
271 times.actime = times.modtime = tm;
274 times[0] = times[1] = tm;
277 if (utime (file, ×) == -1)
278 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
281 /* Checks if FILE is a symbolic link, and removes it if it is. Does
282 nothing under MS-Windows. */
284 remove_link (const char *file)
289 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
291 DEBUGP (("Unlinking %s (symlink).\n", file));
294 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
295 file, strerror (errno));
300 /* Does FILENAME exist? This is quite a lousy implementation, since
301 it supplies no error codes -- only a yes-or-no answer. Thus it
302 will return that a file does not exist if, e.g., the directory is
303 unreadable. I don't mind it too much currently, though. The
304 proper way should, of course, be to have a third, error state,
305 other than true/false, but that would introduce uncalled-for
306 additional complexity to the callers. */
308 file_exists_p (const char *filename)
311 return access (filename, F_OK) >= 0;
314 return stat (filename, &buf) >= 0;
318 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
319 Returns 0 on error. */
321 file_non_directory_p (const char *path)
324 /* Use lstat() rather than stat() so that symbolic links pointing to
325 directories can be identified correctly. */
326 if (lstat (path, &buf) != 0)
328 return S_ISDIR (buf.st_mode) ? 0 : 1;
331 /* Return the size of file named by FILENAME, or -1 if it cannot be
332 opened or seeked into. */
334 file_size (const char *filename)
337 /* We use fseek rather than stat to determine the file size because
338 that way we can also verify whether the file is readable.
339 Inspired by the POST patch by Arnaud Wylie. */
340 FILE *fp = fopen (filename, "rb");
343 fseek (fp, 0, SEEK_END);
349 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
350 doesn't exist is found. Return a freshly allocated copy of the
354 unique_name_1 (const char *prefix)
357 int plen = strlen (prefix);
358 char *template = (char *)alloca (plen + 1 + 24);
359 char *template_tail = template + plen;
361 memcpy (template, prefix, plen);
362 *template_tail++ = '.';
365 number_to_string (template_tail, count++);
366 while (file_exists_p (template));
368 return xstrdup (template);
371 /* Return a unique file name, based on FILE.
373 More precisely, if FILE doesn't exist, it is returned unmodified.
374 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
375 file name that doesn't exist is returned.
377 The resulting file is not created, only verified that it didn't
378 exist at the point in time when the function was called.
379 Therefore, where security matters, don't rely that the file created
380 by this function exists until you open it with O_EXCL or
383 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
384 string. Otherwise, it may return FILE if the file doesn't exist
385 (and therefore doesn't need changing). */
388 unique_name (const char *file, int allow_passthrough)
390 /* If the FILE itself doesn't exist, return it without
392 if (!file_exists_p (file))
393 return allow_passthrough ? (char *)file : xstrdup (file);
395 /* Otherwise, find a numeric suffix that results in unused file name
397 return unique_name_1 (file);
400 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
401 are missing, create them first. In case any mkdir() call fails,
402 return its error status. Returns 0 on successful completion.
404 The behaviour of this function should be identical to the behaviour
405 of `mkdir -p' on systems where mkdir supports the `-p' option. */
407 make_directory (const char *directory)
414 /* Make a copy of dir, to be able to write to it. Otherwise, the
415 function is unsafe if called with a read-only char *argument. */
416 STRDUP_ALLOCA (dir, directory);
418 /* If the first character of dir is '/', skip it (and thus enable
419 creation of absolute-pathname directories. */
420 for (i = (*dir == '/'); 1; ++i)
422 for (; dir[i] && dir[i] != '/'; i++)
427 /* Check whether the directory already exists. Allow creation of
428 of intermediate directories to fail, as the initial path components
429 are not necessarily directories! */
430 if (!file_exists_p (dir))
431 ret = mkdir (dir, 0777);
442 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
443 should be a file name.
445 file_merge("/foo/bar", "baz") => "/foo/baz"
446 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
447 file_merge("foo", "bar") => "bar"
449 In other words, it's a simpler and gentler version of uri_merge_1. */
452 file_merge (const char *base, const char *file)
455 const char *cut = (const char *)strrchr (base, '/');
458 return xstrdup (file);
460 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
461 memcpy (result, base, cut - base);
462 result[cut - base] = '/';
463 strcpy (result + (cut - base) + 1, file);
468 static int in_acclist PARAMS ((const char *const *, const char *, int));
470 /* Determine whether a file is acceptable to be followed, according to
471 lists of patterns to accept/reject. */
473 acceptable (const char *s)
477 while (l && s[l] != '/')
484 return (in_acclist ((const char *const *)opt.accepts, s, 1)
485 && !in_acclist ((const char *const *)opt.rejects, s, 1));
487 return in_acclist ((const char *const *)opt.accepts, s, 1);
489 else if (opt.rejects)
490 return !in_acclist ((const char *const *)opt.rejects, s, 1);
494 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
495 `/something', frontcmp() will return 1 only if S2 begins with
496 `/something'. Otherwise, 0 is returned. */
498 frontcmp (const char *s1, const char *s2)
500 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
504 /* Iterate through STRLIST, and return the first element that matches
505 S, through wildcards or front comparison (as appropriate). */
507 proclist (char **strlist, const char *s, enum accd flags)
511 for (x = strlist; *x; x++)
512 if (has_wildcards_p (*x))
514 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
519 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
526 /* Returns whether DIRECTORY is acceptable for download, wrt the
527 include/exclude lists.
529 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
530 and absolute paths may be freely intermixed. */
532 accdir (const char *directory, enum accd flags)
534 /* Remove starting '/'. */
535 if (flags & ALLABS && *directory == '/')
539 if (!proclist (opt.includes, directory, flags))
544 if (proclist (opt.excludes, directory, flags))
550 /* Return non-zero if STRING ends with TAIL. For instance:
552 match_tail ("abc", "bc", 0) -> 1
553 match_tail ("abc", "ab", 0) -> 0
554 match_tail ("abc", "abc", 0) -> 1
556 If FOLD_CASE_P is non-zero, the comparison will be
560 match_tail (const char *string, const char *tail, int fold_case_p)
564 /* We want this to be fast, so we code two loops, one with
565 case-folding, one without. */
569 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
570 if (string[i] != tail[j])
575 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
576 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
580 /* If the tail was exhausted, the match was succesful. */
587 /* Checks whether string S matches each element of ACCEPTS. A list
588 element are matched either with fnmatch() or match_tail(),
589 according to whether the element contains wildcards or not.
591 If the BACKWARD is 0, don't do backward comparison -- just compare
594 in_acclist (const char *const *accepts, const char *s, int backward)
596 for (; *accepts; accepts++)
598 if (has_wildcards_p (*accepts))
600 /* fnmatch returns 0 if the pattern *does* match the
602 if (fnmatch (*accepts, s, 0) == 0)
609 if (match_tail (s, *accepts, 0))
614 if (!strcmp (s, *accepts))
622 /* Return the location of STR's suffix (file extension). Examples:
623 suffix ("foo.bar") -> "bar"
624 suffix ("foo.bar.baz") -> "baz"
625 suffix ("/foo/bar") -> NULL
626 suffix ("/foo.bar/baz") -> NULL */
628 suffix (const char *str)
632 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
636 return (char *)str + i;
641 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
645 has_wildcards_p (const char *s)
648 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
653 /* Return non-zero if FNAME ends with a typical HTML suffix. The
654 following (case-insensitive) suffixes are presumed to be HTML files:
658 ?html (`?' matches one character)
660 #### CAVEAT. This is not necessarily a good indication that FNAME
661 refers to a file that contains HTML! */
663 has_html_suffix_p (const char *fname)
667 if ((suf = suffix (fname)) == NULL)
669 if (!strcasecmp (suf, "html"))
671 if (!strcasecmp (suf, "htm"))
673 if (suf[0] && !strcasecmp (suf + 1, "html"))
678 /* Read a line from FP and return the pointer to freshly allocated
679 storage. The storage space is obtained through malloc() and should
680 be freed with free() when it is no longer needed.
682 The length of the line is not limited, except by available memory.
683 The newline character at the end of line is retained. The line is
684 terminated with a zero character.
686 After end-of-file is encountered without anything being read, NULL
687 is returned. NULL is also returned on error. To distinguish
688 between these two cases, use the stdio function ferror(). */
691 read_whole_line (FILE *fp)
695 char *line = (char *)xmalloc (bufsize);
697 while (fgets (line + length, bufsize - length, fp))
699 length += strlen (line + length);
701 /* Possible for example when reading from a binary file where
702 a line begins with \0. */
705 if (line[length - 1] == '\n')
708 /* fgets() guarantees to read the whole line, or to use up the
709 space we've given it. We can double the buffer
712 line = xrealloc (line, bufsize);
714 if (length == 0 || ferror (fp))
719 if (length + 1 < bufsize)
720 /* Relieve the memory from our exponential greediness. We say
721 `length + 1' because the terminating \0 is not included in
722 LENGTH. We don't need to zero-terminate the string ourselves,
723 though, because fgets() does that. */
724 line = xrealloc (line, length + 1);
728 /* Read FILE into memory. A pointer to `struct file_memory' are
729 returned; use struct element `content' to access file contents, and
730 the element `length' to know the file length. `content' is *not*
731 zero-terminated, and you should *not* read or write beyond the [0,
732 length) range of characters.
734 After you are done with the file contents, call read_file_free to
737 Depending on the operating system and the type of file that is
738 being read, read_file() either mmap's the file into memory, or
739 reads the file into the core using read().
741 If file is named "-", fileno(stdin) is used for reading instead.
742 If you want to read from a real file named "-", use "./-" instead. */
745 read_file (const char *file)
748 struct file_memory *fm;
750 int inhibit_close = 0;
752 /* Some magic in the finest tradition of Perl and its kin: if FILE
753 is "-", just use stdin. */
758 /* Note that we don't inhibit mmap() in this case. If stdin is
759 redirected from a regular file, mmap() will still work. */
762 fd = open (file, O_RDONLY);
765 fm = xnew (struct file_memory);
770 if (fstat (fd, &buf) < 0)
772 fm->length = buf.st_size;
773 /* NOTE: As far as I know, the callers of this function never
774 modify the file text. Relying on this would enable us to
775 specify PROT_READ and MAP_SHARED for a marginal gain in
776 efficiency, but at some cost to generality. */
777 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
779 if (fm->content == (char *)MAP_FAILED)
789 /* The most common reason why mmap() fails is that FD does not point
790 to a plain file. However, it's also possible that mmap() doesn't
791 work for a particular type of file. Therefore, whenever mmap()
792 fails, we just fall back to the regular method. */
793 #endif /* HAVE_MMAP */
796 size = 512; /* number of bytes fm->contents can
797 hold at any given time. */
798 fm->content = xmalloc (size);
802 if (fm->length > size / 2)
804 /* #### I'm not sure whether the whole exponential-growth
805 thing makes sense with kernel read. On Linux at least,
806 read() refuses to read more than 4K from a file at a
807 single chunk anyway. But other Unixes might optimize it
808 better, and it doesn't *hurt* anything, so I'm leaving
811 /* Normally, we grow SIZE exponentially to make the number
812 of calls to read() and realloc() logarithmic in relation
813 to file size. However, read() can read an amount of data
814 smaller than requested, and it would be unreasonable to
815 double SIZE every time *something* was read. Therefore,
816 we double SIZE only when the length exceeds half of the
817 entire allocated size. */
819 fm->content = xrealloc (fm->content, size);
821 nread = read (fd, fm->content + fm->length, size - fm->length);
823 /* Successful read. */
834 if (size > fm->length && fm->length != 0)
835 /* Due to exponential growth of fm->content, the allocated region
836 might be much larger than what is actually needed. */
837 fm->content = xrealloc (fm->content, fm->length);
849 /* Release the resources held by FM. Specifically, this calls
850 munmap() or xfree() on fm->content, depending whether mmap or
851 malloc/read were used to read in the file. It also frees the
852 memory needed to hold the FM structure itself. */
855 read_file_free (struct file_memory *fm)
860 munmap (fm->content, fm->length);
870 /* Free the pointers in a NULL-terminated vector of pointers, then
871 free the pointer itself. */
873 free_vec (char **vec)
884 /* Append vector V2 to vector V1. The function frees V2 and
885 reallocates V1 (thus you may not use the contents of neither
886 pointer after the call). If V1 is NULL, V2 is returned. */
888 merge_vecs (char **v1, char **v2)
898 /* To avoid j == 0 */
903 for (i = 0; v1[i]; i++);
905 for (j = 0; v2[j]; j++);
907 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
908 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
913 /* A set of simple-minded routines to store strings in a linked list.
914 This used to also be used for searching, but now we have hash
917 /* It's a shame that these simple things like linked lists and hash
918 tables (see hash.c) need to be implemented over and over again. It
919 would be nice to be able to use the routines from glib -- see
920 www.gtk.org for details. However, that would make Wget depend on
921 glib, and I want to avoid dependencies to external libraries for
922 reasons of convenience and portability (I suspect Wget is more
923 portable than anything ever written for Gnome). */
925 /* Append an element to the list. If the list has a huge number of
926 elements, this can get slow because it has to find the list's
927 ending. If you think you have to call slist_append in a loop,
928 think about calling slist_prepend() followed by slist_nreverse(). */
931 slist_append (slist *l, const char *s)
933 slist *newel = xnew (slist);
936 newel->string = xstrdup (s);
941 /* Find the last element. */
948 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
951 slist_prepend (slist *l, const char *s)
953 slist *newel = xnew (slist);
954 newel->string = xstrdup (s);
959 /* Destructively reverse L. */
962 slist_nreverse (slist *l)
967 slist *next = l->next;
975 /* Is there a specific entry in the list? */
977 slist_contains (slist *l, const char *s)
979 for (; l; l = l->next)
980 if (!strcmp (l->string, s))
985 /* Free the whole slist. */
987 slist_free (slist *l)
998 /* Sometimes it's useful to create "sets" of strings, i.e. special
999 hash tables where you want to store strings as keys and merely
1000 query for their existence. Here is a set of utility routines that
1001 makes that transparent. */
1004 string_set_add (struct hash_table *ht, const char *s)
1006 /* First check whether the set element already exists. If it does,
1007 do nothing so that we don't have to free() the old element and
1008 then strdup() a new one. */
1009 if (hash_table_contains (ht, s))
1012 /* We use "1" as value. It provides us a useful and clear arbitrary
1013 value, and it consumes no memory -- the pointers to the same
1014 string "1" will be shared by all the key-value pairs in all `set'
1016 hash_table_put (ht, xstrdup (s), "1");
1019 /* Synonym for hash_table_contains... */
1022 string_set_contains (struct hash_table *ht, const char *s)
1024 return hash_table_contains (ht, s);
1028 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1035 string_set_free (struct hash_table *ht)
1037 hash_table_map (ht, string_set_free_mapper, NULL);
1038 hash_table_destroy (ht);
1042 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1049 /* Another utility function: call free() on all keys and values of HT. */
1052 free_keys_and_values (struct hash_table *ht)
1054 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1058 /* Engine for legible and legible_large_int; add thousand separators
1059 to numbers printed in strings. */
1062 legible_1 (const char *repr)
1064 static char outbuf[48];
1069 /* Reset the pointers. */
1073 /* Ignore the sign for the purpose of adding thousand
1080 /* How many digits before the first separator? */
1081 mod = strlen (inptr) % 3;
1083 for (i = 0; i < mod; i++)
1084 *outptr++ = inptr[i];
1085 /* Now insert the rest of them, putting separator before every
1087 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1089 if (i % 3 == 0 && i1 != 0)
1091 *outptr++ = inptr[i1];
1093 /* Zero-terminate the string. */
1098 /* Legible -- return a static pointer to the legibly printed long. */
1104 /* Print the number into the buffer. */
1105 number_to_string (inbuf, l);
1106 return legible_1 (inbuf);
1109 /* Write a string representation of LARGE_INT NUMBER into the provided
1110 buffer. The buffer should be able to accept 24 characters,
1111 including the terminating zero.
1113 It would be dangerous to use sprintf, because the code wouldn't
1114 work on a machine with gcc-provided long long support, but without
1115 libc support for "%lld". However, such platforms will typically
1116 not have snprintf and will use our version, which does support
1117 "%lld" where long longs are available. */
1120 large_int_to_string (char *buffer, LARGE_INT number)
1122 snprintf (buffer, 24, LARGE_INT_FMT, number);
1125 /* The same as legible(), but works on LARGE_INT. */
1128 legible_large_int (LARGE_INT l)
1131 large_int_to_string (inbuf, l);
1132 return legible_1 (inbuf);
1135 /* Count the digits in a (long) integer. */
1137 numdigit (long number)
1145 while ((number /= 10) > 0)
1150 /* A half-assed implementation of INT_MAX on machines that don't
1151 bother to define one. */
1153 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1156 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1157 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1159 #define DIGITS_1(figure) ONE_DIGIT (figure)
1160 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1161 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1162 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1163 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1164 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1165 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1166 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1167 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1168 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1170 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1172 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1173 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1174 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1175 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1176 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1177 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1178 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1179 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1180 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1182 /* Print NUMBER to BUFFER in base 10. This should be completely
1183 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1185 The speedup may make a difference in programs that frequently
1186 convert numbers to strings. Some implementations of sprintf,
1187 particularly the one in GNU libc, have been known to be extremely
1188 slow compared to this function.
1190 Return the pointer to the location where the terminating zero was
1191 printed. (Equivalent to calling buffer+strlen(buffer) after the
1194 BUFFER should be big enough to accept as many bytes as you expect
1195 the number to take up. On machines with 64-bit longs the maximum
1196 needed size is 24 bytes. That includes the digits needed for the
1197 largest 64-bit number, the `-' sign in case it's negative, and the
1198 terminating '\0'. */
1201 number_to_string (char *buffer, long number)
1206 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1207 /* We are running in a strange or misconfigured environment. Let
1208 sprintf cope with it. */
1209 sprintf (buffer, "%ld", n);
1210 p += strlen (buffer);
1211 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1217 /* We cannot print a '-' and assign -n to n because -n would
1218 overflow. Let sprintf deal with this border case. */
1219 sprintf (buffer, "%ld", n);
1220 p += strlen (buffer);
1228 if (n < 10) { DIGITS_1 (1); }
1229 else if (n < 100) { DIGITS_2 (10); }
1230 else if (n < 1000) { DIGITS_3 (100); }
1231 else if (n < 10000) { DIGITS_4 (1000); }
1232 else if (n < 100000) { DIGITS_5 (10000); }
1233 else if (n < 1000000) { DIGITS_6 (100000); }
1234 else if (n < 10000000) { DIGITS_7 (1000000); }
1235 else if (n < 100000000) { DIGITS_8 (10000000); }
1236 else if (n < 1000000000) { DIGITS_9 (100000000); }
1237 #if SIZEOF_LONG == 4
1238 /* ``if (1)'' serves only to preserve editor indentation. */
1239 else if (1) { DIGITS_10 (1000000000); }
1240 #else /* SIZEOF_LONG != 4 */
1241 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1242 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1243 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1244 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1245 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1246 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1247 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1248 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1249 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1250 else { DIGITS_19 (1000000000000000000L); }
1251 #endif /* SIZEOF_LONG != 4 */
1254 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1260 #undef ONE_DIGIT_ADVANCE
1282 /* Support for timers. */
1284 #undef TIMER_WINDOWS
1285 #undef TIMER_GETTIMEOFDAY
1288 /* Depending on the OS and availability of gettimeofday(), one and
1289 only one of the above constants will be defined. Virtually all
1290 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1291 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1292 non-Windows systems without gettimeofday.
1294 #### Perhaps we should also support ftime(), which exists on old
1295 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1296 C, if memory serves me.) */
1299 # define TIMER_WINDOWS
1300 #else /* not WINDOWS */
1301 # ifdef HAVE_GETTIMEOFDAY
1302 # define TIMER_GETTIMEOFDAY
1306 #endif /* not WINDOWS */
1308 #ifdef TIMER_GETTIMEOFDAY
1309 typedef struct timeval wget_sys_time;
1313 typedef time_t wget_sys_time;
1316 #ifdef TIMER_WINDOWS
1317 typedef ULARGE_INTEGER wget_sys_time;
1321 /* Whether the start time has been initialized. */
1324 /* The starting point in time which, subtracted from the current
1325 time, yields elapsed time. */
1326 wget_sys_time start;
1328 /* The most recent elapsed time, calculated by wtimer_elapsed().
1329 Measured in milliseconds. */
1330 double elapsed_last;
1332 /* Approximately, the time elapsed between the true start of the
1333 measurement and the time represented by START. */
1334 double elapsed_pre_start;
1337 /* Allocate a timer. Calling wtimer_read on the timer will return
1338 zero. It is not legal to call wtimer_update with a freshly
1339 allocated timer -- use wtimer_reset first. */
1342 wtimer_allocate (void)
1344 struct wget_timer *wt = xnew (struct wget_timer);
1349 /* Allocate a new timer and reset it. Return the new timer. */
1354 struct wget_timer *wt = wtimer_allocate ();
1359 /* Free the resources associated with the timer. Its further use is
1363 wtimer_delete (struct wget_timer *wt)
1368 /* Store system time to WST. */
1371 wtimer_sys_set (wget_sys_time *wst)
1373 #ifdef TIMER_GETTIMEOFDAY
1374 gettimeofday (wst, NULL);
1381 #ifdef TIMER_WINDOWS
1382 /* We use GetSystemTime to get the elapsed time. MSDN warns that
1383 system clock adjustments can skew the output of GetSystemTime
1384 when used as a timer and gives preference to GetTickCount and
1385 high-resolution timers. But GetTickCount can overflow, and hires
1386 timers are typically used for profiling, not for regular time
1387 measurement. Since we handle clock skew anyway, we just use
1391 GetSystemTime (&st);
1393 /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1394 FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1395 arithmetic on that. */
1396 SystemTimeToFileTime (&st, &ft);
1397 wst->HighPart = ft.dwHighDateTime;
1398 wst->LowPart = ft.dwLowDateTime;
1402 /* Reset timer WT. This establishes the starting point from which
1403 wtimer_elapsed() will return the number of elapsed milliseconds.
1404 It is allowed to reset a previously used timer.
1406 If a non-zero value is used as START, the timer's values will be
1410 wtimer_reset (struct wget_timer *wt)
1412 /* Set the start time to the current time. */
1413 wtimer_sys_set (&wt->start);
1414 wt->elapsed_last = 0;
1415 wt->elapsed_pre_start = 0;
1416 wt->initialized = 1;
1420 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1422 #ifdef TIMER_GETTIMEOFDAY
1423 return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1424 + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1428 return 1000 * (*wst1 - *wst2);
1432 /* VC++ 6 doesn't support direct cast of uint64 to double. To work
1433 around this, we subtract, then convert to signed, then finally to
1435 return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1439 /* Update the timer's elapsed interval. This function causes the
1440 timer to call gettimeofday (or time(), etc.) to update its idea of
1441 current time. To get the elapsed interval in milliseconds, use
1444 This function handles clock skew, i.e. time that moves backwards is
1448 wtimer_update (struct wget_timer *wt)
1453 assert (wt->initialized != 0);
1455 wtimer_sys_set (&now);
1456 elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1458 /* Ideally we'd just return the difference between NOW and
1459 wt->start. However, the system timer can be set back, and we
1460 could return a value smaller than when we were last called, even
1461 a negative value. Both of these would confuse the callers, which
1462 expect us to return monotonically nondecreasing values.
1464 Therefore: if ELAPSED is smaller than its previous known value,
1465 we reset wt->start to the current time and effectively start
1466 measuring from this point. But since we don't want the elapsed
1467 value to start from zero, we set elapsed_pre_start to the last
1468 elapsed time and increment all future calculations by that
1471 if (elapsed < wt->elapsed_last)
1474 wt->elapsed_pre_start = wt->elapsed_last;
1475 elapsed = wt->elapsed_last;
1478 wt->elapsed_last = elapsed;
1481 /* Return the elapsed time in milliseconds between the last call to
1482 wtimer_reset and the last call to wtimer_update.
1484 A typical use of the timer interface would be:
1486 struct wtimer *timer = wtimer_new ();
1487 ... do something that takes a while ...
1489 double msecs = wtimer_read (); */
1492 wtimer_read (const struct wget_timer *wt)
1494 return wt->elapsed_last;
1497 /* Return the assessed granularity of the timer implementation, in
1498 milliseconds. This is used by code that tries to substitute a
1499 better value for timers that have returned zero. */
1502 wtimer_granularity (void)
1504 #ifdef TIMER_GETTIMEOFDAY
1505 /* Granularity of gettimeofday varies wildly between architectures.
1506 However, it appears that on modern machines it tends to be better
1507 than 1ms. Assume 100 usecs. (Perhaps the configure process
1508 could actually measure this?) */
1516 #ifdef TIMER_WINDOWS
1517 /* According to MSDN, GetSystemTime returns a broken-down time
1518 structure the smallest member of which are milliseconds. */
1523 /* This should probably be at a better place, but it doesn't really
1524 fit into html-parse.c. */
1526 /* The function returns the pointer to the malloc-ed quoted version of
1527 string s. It will recognize and quote numeric and special graphic
1528 entities, as per RFC1866:
1536 No other entities are recognized or replaced. */
1538 html_quote_string (const char *s)
1544 /* Pass through the string, and count the new size. */
1545 for (i = 0; *s; s++, i++)
1548 i += 4; /* `amp;' */
1549 else if (*s == '<' || *s == '>')
1550 i += 3; /* `lt;' and `gt;' */
1551 else if (*s == '\"')
1552 i += 5; /* `quot;' */
1556 res = (char *)xmalloc (i + 1);
1558 for (p = res; *s; s++)
1571 *p++ = (*s == '<' ? 'l' : 'g');
1598 /* Determine the width of the terminal we're running on. If that's
1599 not possible, return 0. */
1602 determine_screen_width (void)
1604 /* If there's a way to get the terminal size using POSIX
1605 tcgetattr(), somebody please tell me. */
1608 #else /* TIOCGWINSZ */
1612 if (opt.lfilename != NULL)
1615 fd = fileno (stderr);
1616 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1617 return 0; /* most likely ENOTTY */
1620 #endif /* TIOCGWINSZ */
1623 /* Return a random number between 0 and MAX-1, inclusive.
1625 If MAX is greater than the value of RAND_MAX+1 on the system, the
1626 returned value will be in the range [0, RAND_MAX]. This may be
1627 fixed in a future release.
1629 The random number generator is seeded automatically the first time
1632 This uses rand() for portability. It has been suggested that
1633 random() offers better randomness, but this is not required for
1634 Wget, so I chose to go for simplicity and use rand
1637 DO NOT use this for cryptographic purposes. It is only meant to be
1638 used in situations where quality of the random numbers returned
1639 doesn't really matter. */
1642 random_number (int max)
1650 srand (time (NULL));
1655 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1656 and enforce that assumption by masking other bits. */
1658 # define RAND_MAX 32767
1662 /* This is equivalent to rand() % max, but uses the high-order bits
1663 for better randomness on architecture where rand() is implemented
1664 using a simple congruential generator. */
1666 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1667 return (int)bounded;
1670 /* Return a random uniformly distributed floating point number in the
1671 [0, 1) range. The precision of returned numbers is 9 digits.
1673 Modify this to use erand48() where available! */
1678 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1679 sure it's greater than 1000. */
1680 int rnd1 = random_number (1000);
1681 int rnd2 = random_number (1000);
1682 int rnd3 = random_number (1000);
1683 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1687 /* A debugging function for checking whether an MD5 library works. */
1689 #include "gen-md5.h"
1692 debug_test_md5 (char *buf)
1694 unsigned char raw[16];
1695 static char res[33];
1699 ALLOCA_MD5_CONTEXT (ctx);
1702 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1703 gen_md5_finish (ctx, raw);
1710 *p2++ = XNUM_TO_digit (*p1 >> 4);
1711 *p2++ = XNUM_TO_digit (*p1 & 0xf);
1720 /* Implementation of run_with_timeout, a generic timeout-forcing
1721 routine for systems with Unix-like signal handling. */
1723 #ifdef USE_SIGNAL_TIMEOUT
1724 # ifdef HAVE_SIGSETJMP
1725 # define SETJMP(env) sigsetjmp (env, 1)
1727 static sigjmp_buf run_with_timeout_env;
1730 abort_run_with_timeout (int sig)
1732 assert (sig == SIGALRM);
1733 siglongjmp (run_with_timeout_env, -1);
1735 # else /* not HAVE_SIGSETJMP */
1736 # define SETJMP(env) setjmp (env)
1738 static jmp_buf run_with_timeout_env;
1741 abort_run_with_timeout (int sig)
1743 assert (sig == SIGALRM);
1744 /* We don't have siglongjmp to preserve the set of blocked signals;
1745 if we longjumped out of the handler at this point, SIGALRM would
1746 remain blocked. We must unblock it manually. */
1747 int mask = siggetmask ();
1748 mask &= ~sigmask (SIGALRM);
1751 /* Now it's safe to longjump. */
1752 longjmp (run_with_timeout_env, -1);
1754 # endif /* not HAVE_SIGSETJMP */
1756 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1757 setitimer where available, alarm otherwise.
1759 TIMEOUT should be non-zero. If the timeout value is so small that
1760 it would be rounded to zero, it is rounded to the least legal value
1761 instead (1us for setitimer, 1s for alarm). That ensures that
1762 SIGALRM will be delivered in all cases. */
1765 alarm_set (double timeout)
1768 /* Use the modern itimer interface. */
1769 struct itimerval itv;
1771 itv.it_value.tv_sec = (long) timeout;
1772 itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1773 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1774 /* Ensure that we wait for at least the minimum interval.
1775 Specifying zero would mean "wait forever". */
1776 itv.it_value.tv_usec = 1;
1777 setitimer (ITIMER_REAL, &itv, NULL);
1778 #else /* not ITIMER_REAL */
1779 /* Use the old alarm() interface. */
1780 int secs = (int) timeout;
1782 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1783 because alarm(0) means "never deliver the alarm", i.e. "wait
1784 forever", which is not what someone who specifies a 0.5s
1785 timeout would expect. */
1788 #endif /* not ITIMER_REAL */
1791 /* Cancel the alarm set with alarm_set. */
1797 struct itimerval disable;
1799 setitimer (ITIMER_REAL, &disable, NULL);
1800 #else /* not ITIMER_REAL */
1802 #endif /* not ITIMER_REAL */
1805 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1806 seconds. Returns non-zero if the function was interrupted with a
1807 timeout, zero otherwise.
1809 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1810 using setitimer() or alarm(). The timeout is enforced by
1811 longjumping out of the SIGALRM handler. This has several
1812 advantages compared to the traditional approach of relying on
1813 signals causing system calls to exit with EINTR:
1815 * The callback function is *forcibly* interrupted after the
1816 timeout expires, (almost) regardless of what it was doing and
1817 whether it was in a syscall. For example, a calculation that
1818 takes a long time is interrupted as reliably as an IO
1821 * It works with both SYSV and BSD signals because it doesn't
1822 depend on the default setting of SA_RESTART.
1824 * It doesn't special handler setup beyond a simple call to
1825 signal(). (It does use sigsetjmp/siglongjmp, but they're
1828 The only downside is that, if FUN allocates internal resources that
1829 are normally freed prior to exit from the functions, they will be
1830 lost in case of timeout. */
1833 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1843 signal (SIGALRM, abort_run_with_timeout);
1844 if (SETJMP (run_with_timeout_env) != 0)
1846 /* Longjumped out of FUN with a timeout. */
1847 signal (SIGALRM, SIG_DFL);
1850 alarm_set (timeout);
1853 /* Preserve errno in case alarm() or signal() modifies it. */
1854 saved_errno = errno;
1856 signal (SIGALRM, SIG_DFL);
1857 errno = saved_errno;
1862 #else /* not USE_SIGNAL_TIMEOUT */
1865 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1866 define it under Windows, because Windows has its own version of
1867 run_with_timeout that uses threads. */
1870 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1875 #endif /* not WINDOWS */
1876 #endif /* not USE_SIGNAL_TIMEOUT */
1880 /* Sleep the specified amount of seconds. On machines without
1881 nanosleep(), this may sleep shorter if interrupted by signals. */
1884 xsleep (double seconds)
1886 #ifdef HAVE_NANOSLEEP
1887 /* nanosleep is the preferred interface because it offers high
1888 accuracy and, more importantly, because it allows us to reliably
1889 restart after having been interrupted by a signal such as
1891 struct timespec sleep, remaining;
1892 sleep.tv_sec = (long) seconds;
1893 sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);
1894 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1895 /* If nanosleep has been interrupted by a signal, adjust the
1896 sleeping period and return to sleep. */
1898 #else /* not HAVE_NANOSLEEP */
1900 /* If usleep is available, use it in preference to select. */
1903 /* usleep apparently accepts unsigned long, which means it can't
1904 sleep longer than ~70 min (35min if signed). If the period
1905 is larger than what usleep can safely handle, use sleep
1906 first, then add usleep for subsecond accuracy. */
1908 seconds -= (long) seconds;
1910 usleep (seconds * 1000000L);
1911 #else /* not HAVE_USLEEP */
1913 struct timeval sleep;
1914 sleep.tv_sec = (long) seconds;
1915 sleep.tv_usec = 1000000L * (seconds - (long) seconds);
1916 select (0, NULL, NULL, NULL, &sleep);
1917 /* If select returns -1 and errno is EINTR, it means we were
1918 interrupted by a signal. But without knowing how long we've
1919 actually slept, we can't return to sleep. Using gettimeofday to
1920 track sleeps is slow and unreliable due to clock skew. */
1921 #else /* not HAVE_SELECT */
1923 #endif /* not HAVE_SELECT */
1924 #endif /* not HAVE_USLEEP */
1925 #endif /* not HAVE_NANOSLEEP */
1928 #endif /* not WINDOWS */