1 /* Various utility functions.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
36 #else /* not HAVE_STRING_H */
38 #endif /* not HAVE_STRING_H */
39 #include <sys/types.h>
44 # include <sys/mman.h>
53 #ifdef HAVE_SYS_UTIME_H
54 # include <sys/utime.h>
58 # include <libc.h> /* for access() */
63 /* For TIOCGWINSZ and friends: */
64 #ifdef HAVE_SYS_IOCTL_H
65 # include <sys/ioctl.h>
71 /* Needed for run_with_timeout. */
72 #undef USE_SIGNAL_TIMEOUT
80 #ifndef HAVE_SIGSETJMP
81 /* If sigsetjmp is a macro, configure won't pick it up. */
83 # define HAVE_SIGSETJMP
88 # ifdef HAVE_SIGSETJMP
89 # define USE_SIGNAL_TIMEOUT
92 # define USE_SIGNAL_TIMEOUT
104 /* Utility function: like xstrdup(), but also lowercases S. */
107 xstrdup_lower (const char *s)
109 char *copy = xstrdup (s);
116 /* Return a count of how many times CHR occurs in STRING. */
119 count_char (const char *string, char chr)
123 for (p = string; *p; p++)
129 /* Copy the string formed by two pointers (one on the beginning, other
130 on the char after the last char) to a new, malloc-ed location.
133 strdupdelim (const char *beg, const char *end)
135 char *res = (char *)xmalloc (end - beg + 1);
136 memcpy (res, beg, end - beg);
137 res[end - beg] = '\0';
141 /* Parse a string containing comma-separated elements, and return a
142 vector of char pointers with the elements. Spaces following the
143 commas are ignored. */
145 sepstring (const char *s)
159 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
160 res[i] = strdupdelim (p, s);
163 /* Skip the blanks following the ','. */
171 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
172 res[i] = strdupdelim (p, s);
177 /* Return pointer to a static char[] buffer in which zero-terminated
178 string-representation of TM (in form hh:mm:ss) is printed.
180 If TM is NULL, the current time will be used. */
183 time_str (time_t *tm)
185 static char output[15];
187 time_t secs = tm ? *tm : time (NULL);
191 /* In case of error, return the empty string. Maybe we should
192 just abort if this happens? */
196 ptm = localtime (&secs);
197 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
201 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
204 datetime_str (time_t *tm)
206 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
208 time_t secs = tm ? *tm : time (NULL);
212 /* In case of error, return the empty string. Maybe we should
213 just abort if this happens? */
217 ptm = localtime (&secs);
218 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
219 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
220 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
224 /* The Windows versions of the following two functions are defined in
229 fork_to_background (void)
232 /* Whether we arrange our own version of opt.lfilename here. */
237 opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
249 /* parent, no error */
250 printf (_("Continuing in background, pid %d.\n"), (int)pid);
252 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
253 exit (0); /* #### should we use _exit()? */
256 /* child: give up the privileges and keep running. */
258 freopen ("/dev/null", "r", stdin);
259 freopen ("/dev/null", "w", stdout);
260 freopen ("/dev/null", "w", stderr);
262 #endif /* not WINDOWS */
264 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
265 specified with TM. */
267 touch (const char *file, time_t tm)
269 #ifdef HAVE_STRUCT_UTIMBUF
270 struct utimbuf times;
271 times.actime = times.modtime = tm;
274 times[0] = times[1] = tm;
277 if (utime (file, ×) == -1)
278 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
281 /* Checks if FILE is a symbolic link, and removes it if it is. Does
282 nothing under MS-Windows. */
284 remove_link (const char *file)
289 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
291 DEBUGP (("Unlinking %s (symlink).\n", file));
294 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
295 file, strerror (errno));
300 /* Does FILENAME exist? This is quite a lousy implementation, since
301 it supplies no error codes -- only a yes-or-no answer. Thus it
302 will return that a file does not exist if, e.g., the directory is
303 unreadable. I don't mind it too much currently, though. The
304 proper way should, of course, be to have a third, error state,
305 other than true/false, but that would introduce uncalled-for
306 additional complexity to the callers. */
308 file_exists_p (const char *filename)
311 return access (filename, F_OK) >= 0;
314 return stat (filename, &buf) >= 0;
318 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
319 Returns 0 on error. */
321 file_non_directory_p (const char *path)
324 /* Use lstat() rather than stat() so that symbolic links pointing to
325 directories can be identified correctly. */
326 if (lstat (path, &buf) != 0)
328 return S_ISDIR (buf.st_mode) ? 0 : 1;
331 /* Return the size of file named by FILENAME, or -1 if it cannot be
332 opened or seeked into. */
334 file_size (const char *filename)
337 /* We use fseek rather than stat to determine the file size because
338 that way we can also verify whether the file is readable.
339 Inspired by the POST patch by Arnaud Wylie. */
340 FILE *fp = fopen (filename, "rb");
343 fseek (fp, 0, SEEK_END);
349 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
350 doesn't exist is found. Return a freshly allocated copy of the
354 unique_name_1 (const char *prefix)
357 int plen = strlen (prefix);
358 char *template = (char *)alloca (plen + 1 + 24);
359 char *template_tail = template + plen;
361 memcpy (template, prefix, plen);
362 *template_tail++ = '.';
365 number_to_string (template_tail, count++);
366 while (file_exists_p (template));
368 return xstrdup (template);
371 /* Return a unique file name, based on FILE.
373 More precisely, if FILE doesn't exist, it is returned unmodified.
374 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
375 file name that doesn't exist is returned.
377 The resulting file is not created, only verified that it didn't
378 exist at the point in time when the function was called.
379 Therefore, where security matters, don't rely that the file created
380 by this function exists until you open it with O_EXCL or
383 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
384 string. Otherwise, it may return FILE if the file doesn't exist
385 (and therefore doesn't need changing). */
388 unique_name (const char *file, int allow_passthrough)
390 /* If the FILE itself doesn't exist, return it without
392 if (!file_exists_p (file))
393 return allow_passthrough ? (char *)file : xstrdup (file);
395 /* Otherwise, find a numeric suffix that results in unused file name
397 return unique_name_1 (file);
400 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
401 are missing, create them first. In case any mkdir() call fails,
402 return its error status. Returns 0 on successful completion.
404 The behaviour of this function should be identical to the behaviour
405 of `mkdir -p' on systems where mkdir supports the `-p' option. */
407 make_directory (const char *directory)
414 /* Make a copy of dir, to be able to write to it. Otherwise, the
415 function is unsafe if called with a read-only char *argument. */
416 STRDUP_ALLOCA (dir, directory);
418 /* If the first character of dir is '/', skip it (and thus enable
419 creation of absolute-pathname directories. */
420 for (i = (*dir == '/'); 1; ++i)
422 for (; dir[i] && dir[i] != '/'; i++)
427 /* Check whether the directory already exists. Allow creation of
428 of intermediate directories to fail, as the initial path components
429 are not necessarily directories! */
430 if (!file_exists_p (dir))
431 ret = mkdir (dir, 0777);
442 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
443 should be a file name.
445 file_merge("/foo/bar", "baz") => "/foo/baz"
446 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
447 file_merge("foo", "bar") => "bar"
449 In other words, it's a simpler and gentler version of uri_merge_1. */
452 file_merge (const char *base, const char *file)
455 const char *cut = (const char *)strrchr (base, '/');
458 return xstrdup (file);
460 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
461 memcpy (result, base, cut - base);
462 result[cut - base] = '/';
463 strcpy (result + (cut - base) + 1, file);
468 static int in_acclist PARAMS ((const char *const *, const char *, int));
470 /* Determine whether a file is acceptable to be followed, according to
471 lists of patterns to accept/reject. */
473 acceptable (const char *s)
477 while (l && s[l] != '/')
484 return (in_acclist ((const char *const *)opt.accepts, s, 1)
485 && !in_acclist ((const char *const *)opt.rejects, s, 1));
487 return in_acclist ((const char *const *)opt.accepts, s, 1);
489 else if (opt.rejects)
490 return !in_acclist ((const char *const *)opt.rejects, s, 1);
494 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
495 `/something', frontcmp() will return 1 only if S2 begins with
496 `/something'. Otherwise, 0 is returned. */
498 frontcmp (const char *s1, const char *s2)
500 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
504 /* Iterate through STRLIST, and return the first element that matches
505 S, through wildcards or front comparison (as appropriate). */
507 proclist (char **strlist, const char *s, enum accd flags)
511 for (x = strlist; *x; x++)
512 if (has_wildcards_p (*x))
514 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
519 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
526 /* Returns whether DIRECTORY is acceptable for download, wrt the
527 include/exclude lists.
529 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
530 and absolute paths may be freely intermixed. */
532 accdir (const char *directory, enum accd flags)
534 /* Remove starting '/'. */
535 if (flags & ALLABS && *directory == '/')
539 if (!proclist (opt.includes, directory, flags))
544 if (proclist (opt.excludes, directory, flags))
550 /* Return non-zero if STRING ends with TAIL. For instance:
552 match_tail ("abc", "bc", 0) -> 1
553 match_tail ("abc", "ab", 0) -> 0
554 match_tail ("abc", "abc", 0) -> 1
556 If FOLD_CASE_P is non-zero, the comparison will be
560 match_tail (const char *string, const char *tail, int fold_case_p)
564 /* We want this to be fast, so we code two loops, one with
565 case-folding, one without. */
569 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
570 if (string[i] != tail[j])
575 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
576 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
580 /* If the tail was exhausted, the match was succesful. */
587 /* Checks whether string S matches each element of ACCEPTS. A list
588 element are matched either with fnmatch() or match_tail(),
589 according to whether the element contains wildcards or not.
591 If the BACKWARD is 0, don't do backward comparison -- just compare
594 in_acclist (const char *const *accepts, const char *s, int backward)
596 for (; *accepts; accepts++)
598 if (has_wildcards_p (*accepts))
600 /* fnmatch returns 0 if the pattern *does* match the
602 if (fnmatch (*accepts, s, 0) == 0)
609 if (match_tail (s, *accepts, 0))
614 if (!strcmp (s, *accepts))
622 /* Return the location of STR's suffix (file extension). Examples:
623 suffix ("foo.bar") -> "bar"
624 suffix ("foo.bar.baz") -> "baz"
625 suffix ("/foo/bar") -> NULL
626 suffix ("/foo.bar/baz") -> NULL */
628 suffix (const char *str)
632 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
636 return (char *)str + i;
641 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
645 has_wildcards_p (const char *s)
648 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
653 /* Return non-zero if FNAME ends with a typical HTML suffix. The
654 following (case-insensitive) suffixes are presumed to be HTML files:
658 ?html (`?' matches one character)
660 #### CAVEAT. This is not necessarily a good indication that FNAME
661 refers to a file that contains HTML! */
663 has_html_suffix_p (const char *fname)
667 if ((suf = suffix (fname)) == NULL)
669 if (!strcasecmp (suf, "html"))
671 if (!strcasecmp (suf, "htm"))
673 if (suf[0] && !strcasecmp (suf + 1, "html"))
678 /* Read a line from FP and return the pointer to freshly allocated
679 storage. The storage space is obtained through malloc() and should
680 be freed with free() when it is no longer needed.
682 The length of the line is not limited, except by available memory.
683 The newline character at the end of line is retained. The line is
684 terminated with a zero character.
686 After end-of-file is encountered without anything being read, NULL
687 is returned. NULL is also returned on error. To distinguish
688 between these two cases, use the stdio function ferror(). */
691 read_whole_line (FILE *fp)
695 char *line = (char *)xmalloc (bufsize);
697 while (fgets (line + length, bufsize - length, fp))
699 length += strlen (line + length);
701 /* Possible for example when reading from a binary file where
702 a line begins with \0. */
705 if (line[length - 1] == '\n')
708 /* fgets() guarantees to read the whole line, or to use up the
709 space we've given it. We can double the buffer
712 line = xrealloc (line, bufsize);
714 if (length == 0 || ferror (fp))
719 if (length + 1 < bufsize)
720 /* Relieve the memory from our exponential greediness. We say
721 `length + 1' because the terminating \0 is not included in
722 LENGTH. We don't need to zero-terminate the string ourselves,
723 though, because fgets() does that. */
724 line = xrealloc (line, length + 1);
728 /* Read FILE into memory. A pointer to `struct file_memory' are
729 returned; use struct element `content' to access file contents, and
730 the element `length' to know the file length. `content' is *not*
731 zero-terminated, and you should *not* read or write beyond the [0,
732 length) range of characters.
734 After you are done with the file contents, call read_file_free to
737 Depending on the operating system and the type of file that is
738 being read, read_file() either mmap's the file into memory, or
739 reads the file into the core using read().
741 If file is named "-", fileno(stdin) is used for reading instead.
742 If you want to read from a real file named "-", use "./-" instead. */
745 read_file (const char *file)
748 struct file_memory *fm;
750 int inhibit_close = 0;
752 /* Some magic in the finest tradition of Perl and its kin: if FILE
753 is "-", just use stdin. */
758 /* Note that we don't inhibit mmap() in this case. If stdin is
759 redirected from a regular file, mmap() will still work. */
762 fd = open (file, O_RDONLY);
765 fm = xnew (struct file_memory);
770 if (fstat (fd, &buf) < 0)
772 fm->length = buf.st_size;
773 /* NOTE: As far as I know, the callers of this function never
774 modify the file text. Relying on this would enable us to
775 specify PROT_READ and MAP_SHARED for a marginal gain in
776 efficiency, but at some cost to generality. */
777 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
779 if (fm->content == (char *)MAP_FAILED)
789 /* The most common reason why mmap() fails is that FD does not point
790 to a plain file. However, it's also possible that mmap() doesn't
791 work for a particular type of file. Therefore, whenever mmap()
792 fails, we just fall back to the regular method. */
793 #endif /* HAVE_MMAP */
796 size = 512; /* number of bytes fm->contents can
797 hold at any given time. */
798 fm->content = xmalloc (size);
802 if (fm->length > size / 2)
804 /* #### I'm not sure whether the whole exponential-growth
805 thing makes sense with kernel read. On Linux at least,
806 read() refuses to read more than 4K from a file at a
807 single chunk anyway. But other Unixes might optimize it
808 better, and it doesn't *hurt* anything, so I'm leaving
811 /* Normally, we grow SIZE exponentially to make the number
812 of calls to read() and realloc() logarithmic in relation
813 to file size. However, read() can read an amount of data
814 smaller than requested, and it would be unreasonable to
815 double SIZE every time *something* was read. Therefore,
816 we double SIZE only when the length exceeds half of the
817 entire allocated size. */
819 fm->content = xrealloc (fm->content, size);
821 nread = read (fd, fm->content + fm->length, size - fm->length);
823 /* Successful read. */
834 if (size > fm->length && fm->length != 0)
835 /* Due to exponential growth of fm->content, the allocated region
836 might be much larger than what is actually needed. */
837 fm->content = xrealloc (fm->content, fm->length);
849 /* Release the resources held by FM. Specifically, this calls
850 munmap() or xfree() on fm->content, depending whether mmap or
851 malloc/read were used to read in the file. It also frees the
852 memory needed to hold the FM structure itself. */
855 read_file_free (struct file_memory *fm)
860 munmap (fm->content, fm->length);
870 /* Free the pointers in a NULL-terminated vector of pointers, then
871 free the pointer itself. */
873 free_vec (char **vec)
884 /* Append vector V2 to vector V1. The function frees V2 and
885 reallocates V1 (thus you may not use the contents of neither
886 pointer after the call). If V1 is NULL, V2 is returned. */
888 merge_vecs (char **v1, char **v2)
898 /* To avoid j == 0 */
903 for (i = 0; v1[i]; i++);
905 for (j = 0; v2[j]; j++);
907 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
908 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
913 /* A set of simple-minded routines to store strings in a linked list.
914 This used to also be used for searching, but now we have hash
917 /* It's a shame that these simple things like linked lists and hash
918 tables (see hash.c) need to be implemented over and over again. It
919 would be nice to be able to use the routines from glib -- see
920 www.gtk.org for details. However, that would make Wget depend on
921 glib, and I want to avoid dependencies to external libraries for
922 reasons of convenience and portability (I suspect Wget is more
923 portable than anything ever written for Gnome). */
925 /* Append an element to the list. If the list has a huge number of
926 elements, this can get slow because it has to find the list's
927 ending. If you think you have to call slist_append in a loop,
928 think about calling slist_prepend() followed by slist_nreverse(). */
931 slist_append (slist *l, const char *s)
933 slist *newel = xnew (slist);
936 newel->string = xstrdup (s);
941 /* Find the last element. */
948 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
951 slist_prepend (slist *l, const char *s)
953 slist *newel = xnew (slist);
954 newel->string = xstrdup (s);
959 /* Destructively reverse L. */
962 slist_nreverse (slist *l)
967 slist *next = l->next;
975 /* Is there a specific entry in the list? */
977 slist_contains (slist *l, const char *s)
979 for (; l; l = l->next)
980 if (!strcmp (l->string, s))
985 /* Free the whole slist. */
987 slist_free (slist *l)
998 /* Sometimes it's useful to create "sets" of strings, i.e. special
999 hash tables where you want to store strings as keys and merely
1000 query for their existence. Here is a set of utility routines that
1001 makes that transparent. */
1004 string_set_add (struct hash_table *ht, const char *s)
1006 /* First check whether the set element already exists. If it does,
1007 do nothing so that we don't have to free() the old element and
1008 then strdup() a new one. */
1009 if (hash_table_contains (ht, s))
1012 /* We use "1" as value. It provides us a useful and clear arbitrary
1013 value, and it consumes no memory -- the pointers to the same
1014 string "1" will be shared by all the key-value pairs in all `set'
1016 hash_table_put (ht, xstrdup (s), "1");
1019 /* Synonym for hash_table_contains... */
1022 string_set_contains (struct hash_table *ht, const char *s)
1024 return hash_table_contains (ht, s);
1028 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1035 string_set_free (struct hash_table *ht)
1037 hash_table_map (ht, string_set_free_mapper, NULL);
1038 hash_table_destroy (ht);
1042 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1049 /* Another utility function: call free() on all keys and values of HT. */
1052 free_keys_and_values (struct hash_table *ht)
1054 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1058 /* Engine for legible and legible_large_int; add thousand separators
1059 to numbers printed in strings. */
1062 legible_1 (const char *repr)
1064 static char outbuf[48];
1069 /* Reset the pointers. */
1073 /* Ignore the sign for the purpose of adding thousand
1080 /* How many digits before the first separator? */
1081 mod = strlen (inptr) % 3;
1083 for (i = 0; i < mod; i++)
1084 *outptr++ = inptr[i];
1085 /* Now insert the rest of them, putting separator before every
1087 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1089 if (i % 3 == 0 && i1 != 0)
1091 *outptr++ = inptr[i1];
1093 /* Zero-terminate the string. */
1098 /* Legible -- return a static pointer to the legibly printed long. */
1104 /* Print the number into the buffer. */
1105 number_to_string (inbuf, l);
1106 return legible_1 (inbuf);
1109 /* Write a string representation of LARGE_INT NUMBER into the provided
1110 buffer. The buffer should be able to accept 24 characters,
1111 including the terminating zero.
1113 It would be dangerous to use sprintf, because the code wouldn't
1114 work on a machine with gcc-provided long long support, but without
1115 libc support for "%lld". However, such platforms will typically
1116 not have snprintf and will use our version, which does support
1117 "%lld" where long longs are available. */
1120 large_int_to_string (char *buffer, LARGE_INT number)
1122 snprintf (buffer, 24, LARGE_INT_FMT, number);
1125 /* The same as legible(), but works on LARGE_INT. */
1128 legible_large_int (LARGE_INT l)
1131 large_int_to_string (inbuf, l);
1132 return legible_1 (inbuf);
1135 /* Count the digits in a (long) integer. */
1137 numdigit (long number)
1145 while ((number /= 10) > 0)
1150 /* A half-assed implementation of INT_MAX on machines that don't
1151 bother to define one. */
1153 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1156 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1157 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1159 #define DIGITS_1(figure) ONE_DIGIT (figure)
1160 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1161 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1162 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1163 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1164 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1165 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1166 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1167 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1168 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1170 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1172 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1173 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1174 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1175 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1176 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1177 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1178 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1179 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1180 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1182 /* Print NUMBER to BUFFER in base 10. This should be completely
1183 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1185 The speedup may make a difference in programs that frequently
1186 convert numbers to strings. Some implementations of sprintf,
1187 particularly the one in GNU libc, have been known to be extremely
1188 slow compared to this function.
1190 Return the pointer to the location where the terminating zero was
1191 printed. (Equivalent to calling buffer+strlen(buffer) after the
1194 BUFFER should be big enough to accept as many bytes as you expect
1195 the number to take up. On machines with 64-bit longs the maximum
1196 needed size is 24 bytes. That includes the digits needed for the
1197 largest 64-bit number, the `-' sign in case it's negative, and the
1198 terminating '\0'. */
1201 number_to_string (char *buffer, long number)
1206 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1207 /* We are running in a strange or misconfigured environment. Let
1208 sprintf cope with it. */
1209 sprintf (buffer, "%ld", n);
1210 p += strlen (buffer);
1211 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1217 /* We cannot print a '-' and assign -n to n because -n would
1218 overflow. Let sprintf deal with this border case. */
1219 sprintf (buffer, "%ld", n);
1220 p += strlen (buffer);
1228 if (n < 10) { DIGITS_1 (1); }
1229 else if (n < 100) { DIGITS_2 (10); }
1230 else if (n < 1000) { DIGITS_3 (100); }
1231 else if (n < 10000) { DIGITS_4 (1000); }
1232 else if (n < 100000) { DIGITS_5 (10000); }
1233 else if (n < 1000000) { DIGITS_6 (100000); }
1234 else if (n < 10000000) { DIGITS_7 (1000000); }
1235 else if (n < 100000000) { DIGITS_8 (10000000); }
1236 else if (n < 1000000000) { DIGITS_9 (100000000); }
1237 #if SIZEOF_LONG == 4
1238 /* ``if (1)'' serves only to preserve editor indentation. */
1239 else if (1) { DIGITS_10 (1000000000); }
1240 #else /* SIZEOF_LONG != 4 */
1241 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1242 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1243 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1244 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1245 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1246 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1247 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1248 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1249 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1250 else { DIGITS_19 (1000000000000000000L); }
1251 #endif /* SIZEOF_LONG != 4 */
1254 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1260 #undef ONE_DIGIT_ADVANCE
1282 /* Support for timers. */
1284 #undef TIMER_WINDOWS
1285 #undef TIMER_GETTIMEOFDAY
1288 /* Depending on the OS and availability of gettimeofday(), one and
1289 only one of the above constants will be defined. Virtually all
1290 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1291 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1292 non-Windows systems without gettimeofday.
1294 #### Perhaps we should also support ftime(), which exists on old
1295 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1296 C, if memory serves me.) */
1299 # define TIMER_WINDOWS
1300 #else /* not WINDOWS */
1301 # ifdef HAVE_GETTIMEOFDAY
1302 # define TIMER_GETTIMEOFDAY
1306 #endif /* not WINDOWS */
1308 #ifdef TIMER_GETTIMEOFDAY
1309 typedef struct timeval wget_sys_time;
1313 typedef time_t wget_sys_time;
1316 #ifdef TIMER_WINDOWS
1317 typedef ULARGE_INTEGER wget_sys_time;
1321 /* The starting point in time which, subtracted from the current
1322 time, yields elapsed time. */
1323 wget_sys_time start;
1325 /* The most recent elapsed time, calculated by wtimer_elapsed().
1326 Measured in milliseconds. */
1327 double elapsed_last;
1329 /* Approximately, the time elapsed between the true start of the
1330 measurement and the time represented by START. */
1331 double elapsed_pre_start;
1334 /* Allocate a timer. It is not legal to do anything with a freshly
1335 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1338 wtimer_allocate (void)
1340 struct wget_timer *wt = xnew (struct wget_timer);
1344 /* Allocate a new timer and reset it. Return the new timer. */
1349 struct wget_timer *wt = wtimer_allocate ();
1354 /* Free the resources associated with the timer. Its further use is
1358 wtimer_delete (struct wget_timer *wt)
1363 /* Store system time to WST. */
1366 wtimer_sys_set (wget_sys_time *wst)
1368 #ifdef TIMER_GETTIMEOFDAY
1369 gettimeofday (wst, NULL);
1376 #ifdef TIMER_WINDOWS
1377 /* We use GetSystemTime to get the elapsed time. MSDN warns that
1378 system clock adjustments can skew the output of GetSystemTime
1379 when used as a timer and gives preference to GetTickCount and
1380 high-resolution timers. But GetTickCount can overflow, and hires
1381 timers are typically used for profiling, not for regular time
1382 measurement. Since we handle clock skew anyway, we just use
1386 GetSystemTime (&st);
1388 /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1389 FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1390 arithmetic on that. */
1391 SystemTimeToFileTime (&st, &ft);
1392 wst->HighPart = ft.dwHighDateTime;
1393 wst->LowPart = ft.dwLowDateTime;
1397 /* Reset timer WT. This establishes the starting point from which
1398 wtimer_elapsed() will return the number of elapsed
1399 milliseconds. It is allowed to reset a previously used timer. */
1402 wtimer_reset (struct wget_timer *wt)
1404 /* Set the start time to the current time. */
1405 wtimer_sys_set (&wt->start);
1406 wt->elapsed_last = 0;
1407 wt->elapsed_pre_start = 0;
1411 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1413 #ifdef TIMER_GETTIMEOFDAY
1414 return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1415 + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1419 return 1000 * (*wst1 - *wst2);
1423 /* VC++ 6 doesn't support direct cast of uint64 to double. To work
1424 around this, we subtract, then convert to signed, then finally to
1426 return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1430 /* Return the number of milliseconds elapsed since the timer was last
1431 reset. It is allowed to call this function more than once to get
1432 increasingly higher elapsed values. These timers handle clock
1436 wtimer_elapsed (struct wget_timer *wt)
1441 wtimer_sys_set (&now);
1442 elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1444 /* Ideally we'd just return the difference between NOW and
1445 wt->start. However, the system timer can be set back, and we
1446 could return a value smaller than when we were last called, even
1447 a negative value. Both of these would confuse the callers, which
1448 expect us to return monotonically nondecreasing values.
1450 Therefore: if ELAPSED is smaller than its previous known value,
1451 we reset wt->start to the current time and effectively start
1452 measuring from this point. But since we don't want the elapsed
1453 value to start from zero, we set elapsed_pre_start to the last
1454 elapsed time and increment all future calculations by that
1457 if (elapsed < wt->elapsed_last)
1460 wt->elapsed_pre_start = wt->elapsed_last;
1461 elapsed = wt->elapsed_last;
1464 wt->elapsed_last = elapsed;
1468 /* Return the assessed granularity of the timer implementation, in
1469 milliseconds. This is used by code that tries to substitute a
1470 better value for timers that have returned zero. */
1473 wtimer_granularity (void)
1475 #ifdef TIMER_GETTIMEOFDAY
1476 /* Granularity of gettimeofday varies wildly between architectures.
1477 However, it appears that on modern machines it tends to be better
1478 than 1ms. Assume 100 usecs. (Perhaps the configure process
1479 could actually measure this?) */
1487 #ifdef TIMER_WINDOWS
1488 /* According to MSDN, GetSystemTime returns a broken-down time
1489 structure the smallest member of which are milliseconds. */
1494 /* This should probably be at a better place, but it doesn't really
1495 fit into html-parse.c. */
1497 /* The function returns the pointer to the malloc-ed quoted version of
1498 string s. It will recognize and quote numeric and special graphic
1499 entities, as per RFC1866:
1507 No other entities are recognized or replaced. */
1509 html_quote_string (const char *s)
1515 /* Pass through the string, and count the new size. */
1516 for (i = 0; *s; s++, i++)
1519 i += 4; /* `amp;' */
1520 else if (*s == '<' || *s == '>')
1521 i += 3; /* `lt;' and `gt;' */
1522 else if (*s == '\"')
1523 i += 5; /* `quot;' */
1527 res = (char *)xmalloc (i + 1);
1529 for (p = res; *s; s++)
1542 *p++ = (*s == '<' ? 'l' : 'g');
1569 /* Determine the width of the terminal we're running on. If that's
1570 not possible, return 0. */
1573 determine_screen_width (void)
1575 /* If there's a way to get the terminal size using POSIX
1576 tcgetattr(), somebody please tell me. */
1579 #else /* TIOCGWINSZ */
1583 if (opt.lfilename != NULL)
1586 fd = fileno (stderr);
1587 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1588 return 0; /* most likely ENOTTY */
1591 #endif /* TIOCGWINSZ */
1594 /* Return a random number between 0 and MAX-1, inclusive.
1596 If MAX is greater than the value of RAND_MAX+1 on the system, the
1597 returned value will be in the range [0, RAND_MAX]. This may be
1598 fixed in a future release.
1600 The random number generator is seeded automatically the first time
1603 This uses rand() for portability. It has been suggested that
1604 random() offers better randomness, but this is not required for
1605 Wget, so I chose to go for simplicity and use rand
1608 DO NOT use this for cryptographic purposes. It is only meant to be
1609 used in situations where quality of the random numbers returned
1610 doesn't really matter. */
1613 random_number (int max)
1621 srand (time (NULL));
1626 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1627 and enforce that assumption by masking other bits. */
1629 # define RAND_MAX 32767
1633 /* This is equivalent to rand() % max, but uses the high-order bits
1634 for better randomness on architecture where rand() is implemented
1635 using a simple congruential generator. */
1637 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1638 return (int)bounded;
1641 /* Return a random uniformly distributed floating point number in the
1642 [0, 1) range. The precision of returned numbers is 9 digits.
1644 Modify this to use erand48() where available! */
1649 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1650 sure it's greater than 1000. */
1651 int rnd1 = random_number (1000);
1652 int rnd2 = random_number (1000);
1653 int rnd3 = random_number (1000);
1654 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1658 /* A debugging function for checking whether an MD5 library works. */
1660 #include "gen-md5.h"
1663 debug_test_md5 (char *buf)
1665 unsigned char raw[16];
1666 static char res[33];
1670 ALLOCA_MD5_CONTEXT (ctx);
1673 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1674 gen_md5_finish (ctx, raw);
1681 *p2++ = XNUM_TO_digit (*p1 >> 4);
1682 *p2++ = XNUM_TO_digit (*p1 & 0xf);
1691 /* Implementation of run_with_timeout, a generic timeout-forcing
1692 routine for systems with Unix-like signal handling. */
1694 #ifdef USE_SIGNAL_TIMEOUT
1695 # ifdef HAVE_SIGSETJMP
1696 # define SETJMP(env) sigsetjmp (env, 1)
1698 static sigjmp_buf run_with_timeout_env;
1701 abort_run_with_timeout (int sig)
1703 assert (sig == SIGALRM);
1704 siglongjmp (run_with_timeout_env, -1);
1706 # else /* not HAVE_SIGSETJMP */
1707 # define SETJMP(env) setjmp (env)
1709 static jmp_buf run_with_timeout_env;
1712 abort_run_with_timeout (int sig)
1714 assert (sig == SIGALRM);
1715 /* We don't have siglongjmp to preserve the set of blocked signals;
1716 if we longjumped out of the handler at this point, SIGALRM would
1717 remain blocked. We must unblock it manually. */
1718 int mask = siggetmask ();
1719 mask &= ~sigmask (SIGALRM);
1722 /* Now it's safe to longjump. */
1723 longjmp (run_with_timeout_env, -1);
1725 # endif /* not HAVE_SIGSETJMP */
1727 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1728 setitimer where available, alarm otherwise.
1730 TIMEOUT should be non-zero. If the timeout value is so small that
1731 it would be rounded to zero, it is rounded to the least legal value
1732 instead (1us for setitimer, 1s for alarm). That ensures that
1733 SIGALRM will be delivered in all cases. */
1736 alarm_set (double timeout)
1739 /* Use the modern itimer interface. */
1740 struct itimerval itv;
1742 itv.it_value.tv_sec = (long) timeout;
1743 itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1744 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1745 /* Ensure that we wait for at least the minimum interval.
1746 Specifying zero would mean "wait forever". */
1747 itv.it_value.tv_usec = 1;
1748 setitimer (ITIMER_REAL, &itv, NULL);
1749 #else /* not ITIMER_REAL */
1750 /* Use the old alarm() interface. */
1751 int secs = (int) timeout;
1753 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1754 because alarm(0) means "never deliver the alarm", i.e. "wait
1755 forever", which is not what someone who specifies a 0.5s
1756 timeout would expect. */
1759 #endif /* not ITIMER_REAL */
1762 /* Cancel the alarm set with alarm_set. */
1768 struct itimerval disable;
1770 setitimer (ITIMER_REAL, &disable, NULL);
1771 #else /* not ITIMER_REAL */
1773 #endif /* not ITIMER_REAL */
1776 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1777 seconds. Returns non-zero if the function was interrupted with a
1778 timeout, zero otherwise.
1780 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1781 using setitimer() or alarm(). The timeout is enforced by
1782 longjumping out of the SIGALRM handler. This has several
1783 advantages compared to the traditional approach of relying on
1784 signals causing system calls to exit with EINTR:
1786 * The callback function is *forcibly* interrupted after the
1787 timeout expires, (almost) regardless of what it was doing and
1788 whether it was in a syscall. For example, a calculation that
1789 takes a long time is interrupted as reliably as an IO
1792 * It works with both SYSV and BSD signals because it doesn't
1793 depend on the default setting of SA_RESTART.
1795 * It doesn't special handler setup beyond a simple call to
1796 signal(). (It does use sigsetjmp/siglongjmp, but they're
1799 The only downside is that, if FUN allocates internal resources that
1800 are normally freed prior to exit from the functions, they will be
1801 lost in case of timeout. */
1804 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1814 signal (SIGALRM, abort_run_with_timeout);
1815 if (SETJMP (run_with_timeout_env) != 0)
1817 /* Longjumped out of FUN with a timeout. */
1818 signal (SIGALRM, SIG_DFL);
1821 alarm_set (timeout);
1824 /* Preserve errno in case alarm() or signal() modifies it. */
1825 saved_errno = errno;
1827 signal (SIGALRM, SIG_DFL);
1828 errno = saved_errno;
1833 #else /* not USE_SIGNAL_TIMEOUT */
1836 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1837 define it under Windows, because Windows has its own version of
1838 run_with_timeout that uses threads. */
1841 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1846 #endif /* not WINDOWS */
1847 #endif /* not USE_SIGNAL_TIMEOUT */
1851 /* Sleep the specified amount of seconds. On machines without
1852 nanosleep(), this may sleep shorter if interrupted by signals. */
1855 xsleep (double seconds)
1857 #ifdef HAVE_NANOSLEEP
1858 /* nanosleep is the preferred interface because it offers high
1859 accuracy and, more importantly, because it allows us to reliably
1860 restart after having been interrupted by a signal such as
1862 struct timespec sleep, remaining;
1863 sleep.tv_sec = (long) seconds;
1864 sleep.tv_nsec = 1000000000L * (seconds - (long) seconds);
1865 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1866 /* If nanosleep has been interrupted by a signal, adjust the
1867 sleeping period and return to sleep. */
1869 #else /* not HAVE_NANOSLEEP */
1871 /* If usleep is available, use it in preference to select. */
1874 /* usleep apparently accepts unsigned long, which means it can't
1875 sleep longer than ~70 min (35min if signed). If the period
1876 is larger than what usleep can safely handle, use sleep
1877 first, then add usleep for subsecond accuracy. */
1879 seconds -= (long) seconds;
1881 usleep (seconds * 1000000L);
1882 #else /* not HAVE_USLEEP */
1884 struct timeval sleep;
1885 sleep.tv_sec = (long) seconds;
1886 sleep.tv_usec = 1000000L * (seconds - (long) seconds);
1887 select (0, NULL, NULL, NULL, &sleep);
1888 /* If select returns -1 and errno is EINTR, it means we were
1889 interrupted by a signal. But without knowing how long we've
1890 actually slept, we can't return to sleep. Using gettimeofday to
1891 track sleeps is slow and unreliable due to clock skew. */
1892 #else /* not HAVE_SELECT */
1894 #endif /* not HAVE_SELECT */
1895 #endif /* not HAVE_USLEEP */
1896 #endif /* not HAVE_NANOSLEEP */
1899 #endif /* not WINDOWS */