1 /* Various utility functions.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
36 #else /* not HAVE_STRING_H */
38 #endif /* not HAVE_STRING_H */
39 #include <sys/types.h>
44 # include <sys/mman.h>
53 #ifdef HAVE_SYS_UTIME_H
54 # include <sys/utime.h>
58 # include <libc.h> /* for access() */
63 /* For TIOCGWINSZ and friends: */
64 #ifdef HAVE_SYS_IOCTL_H
65 # include <sys/ioctl.h>
71 /* Needed for run_with_timeout. */
72 #undef USE_SIGNAL_TIMEOUT
80 #ifndef HAVE_SIGSETJMP
81 /* If sigsetjmp is a macro, configure won't pick it up. */
83 # define HAVE_SIGSETJMP
88 # ifdef HAVE_SIGSETJMP
89 # define USE_SIGNAL_TIMEOUT
92 # define USE_SIGNAL_TIMEOUT
104 /* Utility function: like xstrdup(), but also lowercases S. */
107 xstrdup_lower (const char *s)
109 char *copy = xstrdup (s);
116 /* Return a count of how many times CHR occurs in STRING. */
119 count_char (const char *string, char chr)
123 for (p = string; *p; p++)
129 /* Copy the string formed by two pointers (one on the beginning, other
130 on the char after the last char) to a new, malloc-ed location.
133 strdupdelim (const char *beg, const char *end)
135 char *res = (char *)xmalloc (end - beg + 1);
136 memcpy (res, beg, end - beg);
137 res[end - beg] = '\0';
141 /* Parse a string containing comma-separated elements, and return a
142 vector of char pointers with the elements. Spaces following the
143 commas are ignored. */
145 sepstring (const char *s)
159 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
160 res[i] = strdupdelim (p, s);
163 /* Skip the blanks following the ','. */
171 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
172 res[i] = strdupdelim (p, s);
177 /* Return pointer to a static char[] buffer in which zero-terminated
178 string-representation of TM (in form hh:mm:ss) is printed.
180 If TM is non-NULL, the current time-in-seconds will be stored
183 (#### This is misleading: one would expect TM would be used instead
184 of the current time in that case. This design was probably
185 influenced by the design time(2), and should be changed at some
186 points. No callers use non-NULL TM anyway.) */
189 time_str (time_t *tm)
191 static char output[15];
193 time_t secs = time (tm);
197 /* In case of error, return the empty string. Maybe we should
198 just abort if this happens? */
202 ptm = localtime (&secs);
203 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
207 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
210 datetime_str (time_t *tm)
212 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
214 time_t secs = time (tm);
218 /* In case of error, return the empty string. Maybe we should
219 just abort if this happens? */
223 ptm = localtime (&secs);
224 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
225 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
226 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
230 /* The Windows versions of the following two functions are defined in
235 fork_to_background (void)
238 /* Whether we arrange our own version of opt.lfilename here. */
243 opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
255 /* parent, no error */
256 printf (_("Continuing in background, pid %d.\n"), (int)pid);
258 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
259 exit (0); /* #### should we use _exit()? */
262 /* child: give up the privileges and keep running. */
264 freopen ("/dev/null", "r", stdin);
265 freopen ("/dev/null", "w", stdout);
266 freopen ("/dev/null", "w", stderr);
268 #endif /* not WINDOWS */
270 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
271 specified with TM. */
273 touch (const char *file, time_t tm)
275 #ifdef HAVE_STRUCT_UTIMBUF
276 struct utimbuf times;
277 times.actime = times.modtime = tm;
280 times[0] = times[1] = tm;
283 if (utime (file, ×) == -1)
284 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
287 /* Checks if FILE is a symbolic link, and removes it if it is. Does
288 nothing under MS-Windows. */
290 remove_link (const char *file)
295 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
297 DEBUGP (("Unlinking %s (symlink).\n", file));
300 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
301 file, strerror (errno));
306 /* Does FILENAME exist? This is quite a lousy implementation, since
307 it supplies no error codes -- only a yes-or-no answer. Thus it
308 will return that a file does not exist if, e.g., the directory is
309 unreadable. I don't mind it too much currently, though. The
310 proper way should, of course, be to have a third, error state,
311 other than true/false, but that would introduce uncalled-for
312 additional complexity to the callers. */
314 file_exists_p (const char *filename)
317 return access (filename, F_OK) >= 0;
320 return stat (filename, &buf) >= 0;
324 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
325 Returns 0 on error. */
327 file_non_directory_p (const char *path)
330 /* Use lstat() rather than stat() so that symbolic links pointing to
331 directories can be identified correctly. */
332 if (lstat (path, &buf) != 0)
334 return S_ISDIR (buf.st_mode) ? 0 : 1;
337 /* Return the size of file named by FILENAME, or -1 if it cannot be
338 opened or seeked into. */
340 file_size (const char *filename)
343 /* We use fseek rather than stat to determine the file size because
344 that way we can also verify whether the file is readable.
345 Inspired by the POST patch by Arnaud Wylie. */
346 FILE *fp = fopen (filename, "rb");
349 fseek (fp, 0, SEEK_END);
355 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
356 doesn't exist is found. Return a freshly allocated copy of the
360 unique_name_1 (const char *prefix)
363 int plen = strlen (prefix);
364 char *template = (char *)alloca (plen + 1 + 24);
365 char *template_tail = template + plen;
367 memcpy (template, prefix, plen);
368 *template_tail++ = '.';
371 number_to_string (template_tail, count++);
372 while (file_exists_p (template));
374 return xstrdup (template);
377 /* Return a unique file name, based on FILE.
379 More precisely, if FILE doesn't exist, it is returned unmodified.
380 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
381 file name that doesn't exist is returned.
383 The resulting file is not created, only verified that it didn't
384 exist at the point in time when the function was called.
385 Therefore, where security matters, don't rely that the file created
386 by this function exists until you open it with O_EXCL or
389 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
390 string. Otherwise, it may return FILE if the file doesn't exist
391 (and therefore doesn't need changing). */
394 unique_name (const char *file, int allow_passthrough)
396 /* If the FILE itself doesn't exist, return it without
398 if (!file_exists_p (file))
399 return allow_passthrough ? (char *)file : xstrdup (file);
401 /* Otherwise, find a numeric suffix that results in unused file name
403 return unique_name_1 (file);
406 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
407 are missing, create them first. In case any mkdir() call fails,
408 return its error status. Returns 0 on successful completion.
410 The behaviour of this function should be identical to the behaviour
411 of `mkdir -p' on systems where mkdir supports the `-p' option. */
413 make_directory (const char *directory)
420 /* Make a copy of dir, to be able to write to it. Otherwise, the
421 function is unsafe if called with a read-only char *argument. */
422 STRDUP_ALLOCA (dir, directory);
424 /* If the first character of dir is '/', skip it (and thus enable
425 creation of absolute-pathname directories. */
426 for (i = (*dir == '/'); 1; ++i)
428 for (; dir[i] && dir[i] != '/'; i++)
433 /* Check whether the directory already exists. Allow creation of
434 of intermediate directories to fail, as the initial path components
435 are not necessarily directories! */
436 if (!file_exists_p (dir))
437 ret = mkdir (dir, 0777);
448 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
449 should be a file name.
451 file_merge("/foo/bar", "baz") => "/foo/baz"
452 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
453 file_merge("foo", "bar") => "bar"
455 In other words, it's a simpler and gentler version of uri_merge_1. */
458 file_merge (const char *base, const char *file)
461 const char *cut = (const char *)strrchr (base, '/');
464 return xstrdup (file);
466 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
467 memcpy (result, base, cut - base);
468 result[cut - base] = '/';
469 strcpy (result + (cut - base) + 1, file);
474 static int in_acclist PARAMS ((const char *const *, const char *, int));
476 /* Determine whether a file is acceptable to be followed, according to
477 lists of patterns to accept/reject. */
479 acceptable (const char *s)
483 while (l && s[l] != '/')
490 return (in_acclist ((const char *const *)opt.accepts, s, 1)
491 && !in_acclist ((const char *const *)opt.rejects, s, 1));
493 return in_acclist ((const char *const *)opt.accepts, s, 1);
495 else if (opt.rejects)
496 return !in_acclist ((const char *const *)opt.rejects, s, 1);
500 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
501 `/something', frontcmp() will return 1 only if S2 begins with
502 `/something'. Otherwise, 0 is returned. */
504 frontcmp (const char *s1, const char *s2)
506 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
510 /* Iterate through STRLIST, and return the first element that matches
511 S, through wildcards or front comparison (as appropriate). */
513 proclist (char **strlist, const char *s, enum accd flags)
517 for (x = strlist; *x; x++)
518 if (has_wildcards_p (*x))
520 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
525 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
532 /* Returns whether DIRECTORY is acceptable for download, wrt the
533 include/exclude lists.
535 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
536 and absolute paths may be freely intermixed. */
538 accdir (const char *directory, enum accd flags)
540 /* Remove starting '/'. */
541 if (flags & ALLABS && *directory == '/')
545 if (!proclist (opt.includes, directory, flags))
550 if (proclist (opt.excludes, directory, flags))
556 /* Return non-zero if STRING ends with TAIL. For instance:
558 match_tail ("abc", "bc", 0) -> 1
559 match_tail ("abc", "ab", 0) -> 0
560 match_tail ("abc", "abc", 0) -> 1
562 If FOLD_CASE_P is non-zero, the comparison will be
566 match_tail (const char *string, const char *tail, int fold_case_p)
570 /* We want this to be fast, so we code two loops, one with
571 case-folding, one without. */
575 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
576 if (string[i] != tail[j])
581 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
582 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
586 /* If the tail was exhausted, the match was succesful. */
593 /* Checks whether string S matches each element of ACCEPTS. A list
594 element are matched either with fnmatch() or match_tail(),
595 according to whether the element contains wildcards or not.
597 If the BACKWARD is 0, don't do backward comparison -- just compare
600 in_acclist (const char *const *accepts, const char *s, int backward)
602 for (; *accepts; accepts++)
604 if (has_wildcards_p (*accepts))
606 /* fnmatch returns 0 if the pattern *does* match the
608 if (fnmatch (*accepts, s, 0) == 0)
615 if (match_tail (s, *accepts, 0))
620 if (!strcmp (s, *accepts))
628 /* Return the location of STR's suffix (file extension). Examples:
629 suffix ("foo.bar") -> "bar"
630 suffix ("foo.bar.baz") -> "baz"
631 suffix ("/foo/bar") -> NULL
632 suffix ("/foo.bar/baz") -> NULL */
634 suffix (const char *str)
638 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
642 return (char *)str + i;
647 /* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
651 has_wildcards_p (const char *s)
654 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
659 /* Return non-zero if FNAME ends with a typical HTML suffix. The
660 following (case-insensitive) suffixes are presumed to be HTML files:
664 ?html (`?' matches one character)
666 #### CAVEAT. This is not necessarily a good indication that FNAME
667 refers to a file that contains HTML! */
669 has_html_suffix_p (const char *fname)
673 if ((suf = suffix (fname)) == NULL)
675 if (!strcasecmp (suf, "html"))
677 if (!strcasecmp (suf, "htm"))
679 if (suf[0] && !strcasecmp (suf + 1, "html"))
684 /* Read a line from FP and return the pointer to freshly allocated
685 storage. The storage space is obtained through malloc() and should
686 be freed with free() when it is no longer needed.
688 The length of the line is not limited, except by available memory.
689 The newline character at the end of line is retained. The line is
690 terminated with a zero character.
692 After end-of-file is encountered without anything being read, NULL
693 is returned. NULL is also returned on error. To distinguish
694 between these two cases, use the stdio function ferror(). */
697 read_whole_line (FILE *fp)
701 char *line = (char *)xmalloc (bufsize);
703 while (fgets (line + length, bufsize - length, fp))
705 length += strlen (line + length);
707 /* Possible for example when reading from a binary file where
708 a line begins with \0. */
711 if (line[length - 1] == '\n')
714 /* fgets() guarantees to read the whole line, or to use up the
715 space we've given it. We can double the buffer
718 line = xrealloc (line, bufsize);
720 if (length == 0 || ferror (fp))
725 if (length + 1 < bufsize)
726 /* Relieve the memory from our exponential greediness. We say
727 `length + 1' because the terminating \0 is not included in
728 LENGTH. We don't need to zero-terminate the string ourselves,
729 though, because fgets() does that. */
730 line = xrealloc (line, length + 1);
734 /* Read FILE into memory. A pointer to `struct file_memory' are
735 returned; use struct element `content' to access file contents, and
736 the element `length' to know the file length. `content' is *not*
737 zero-terminated, and you should *not* read or write beyond the [0,
738 length) range of characters.
740 After you are done with the file contents, call read_file_free to
743 Depending on the operating system and the type of file that is
744 being read, read_file() either mmap's the file into memory, or
745 reads the file into the core using read().
747 If file is named "-", fileno(stdin) is used for reading instead.
748 If you want to read from a real file named "-", use "./-" instead. */
751 read_file (const char *file)
754 struct file_memory *fm;
756 int inhibit_close = 0;
758 /* Some magic in the finest tradition of Perl and its kin: if FILE
759 is "-", just use stdin. */
764 /* Note that we don't inhibit mmap() in this case. If stdin is
765 redirected from a regular file, mmap() will still work. */
768 fd = open (file, O_RDONLY);
771 fm = xnew (struct file_memory);
776 if (fstat (fd, &buf) < 0)
778 fm->length = buf.st_size;
779 /* NOTE: As far as I know, the callers of this function never
780 modify the file text. Relying on this would enable us to
781 specify PROT_READ and MAP_SHARED for a marginal gain in
782 efficiency, but at some cost to generality. */
783 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
785 if (fm->content == (char *)MAP_FAILED)
795 /* The most common reason why mmap() fails is that FD does not point
796 to a plain file. However, it's also possible that mmap() doesn't
797 work for a particular type of file. Therefore, whenever mmap()
798 fails, we just fall back to the regular method. */
799 #endif /* HAVE_MMAP */
802 size = 512; /* number of bytes fm->contents can
803 hold at any given time. */
804 fm->content = xmalloc (size);
808 if (fm->length > size / 2)
810 /* #### I'm not sure whether the whole exponential-growth
811 thing makes sense with kernel read. On Linux at least,
812 read() refuses to read more than 4K from a file at a
813 single chunk anyway. But other Unixes might optimize it
814 better, and it doesn't *hurt* anything, so I'm leaving
817 /* Normally, we grow SIZE exponentially to make the number
818 of calls to read() and realloc() logarithmic in relation
819 to file size. However, read() can read an amount of data
820 smaller than requested, and it would be unreasonable to
821 double SIZE every time *something* was read. Therefore,
822 we double SIZE only when the length exceeds half of the
823 entire allocated size. */
825 fm->content = xrealloc (fm->content, size);
827 nread = read (fd, fm->content + fm->length, size - fm->length);
829 /* Successful read. */
840 if (size > fm->length && fm->length != 0)
841 /* Due to exponential growth of fm->content, the allocated region
842 might be much larger than what is actually needed. */
843 fm->content = xrealloc (fm->content, fm->length);
855 /* Release the resources held by FM. Specifically, this calls
856 munmap() or xfree() on fm->content, depending whether mmap or
857 malloc/read were used to read in the file. It also frees the
858 memory needed to hold the FM structure itself. */
861 read_file_free (struct file_memory *fm)
866 munmap (fm->content, fm->length);
876 /* Free the pointers in a NULL-terminated vector of pointers, then
877 free the pointer itself. */
879 free_vec (char **vec)
890 /* Append vector V2 to vector V1. The function frees V2 and
891 reallocates V1 (thus you may not use the contents of neither
892 pointer after the call). If V1 is NULL, V2 is returned. */
894 merge_vecs (char **v1, char **v2)
904 /* To avoid j == 0 */
909 for (i = 0; v1[i]; i++);
911 for (j = 0; v2[j]; j++);
913 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
914 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
919 /* A set of simple-minded routines to store strings in a linked list.
920 This used to also be used for searching, but now we have hash
923 /* It's a shame that these simple things like linked lists and hash
924 tables (see hash.c) need to be implemented over and over again. It
925 would be nice to be able to use the routines from glib -- see
926 www.gtk.org for details. However, that would make Wget depend on
927 glib, and I want to avoid dependencies to external libraries for
928 reasons of convenience and portability (I suspect Wget is more
929 portable than anything ever written for Gnome). */
931 /* Append an element to the list. If the list has a huge number of
932 elements, this can get slow because it has to find the list's
933 ending. If you think you have to call slist_append in a loop,
934 think about calling slist_prepend() followed by slist_nreverse(). */
937 slist_append (slist *l, const char *s)
939 slist *newel = xnew (slist);
942 newel->string = xstrdup (s);
947 /* Find the last element. */
954 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
957 slist_prepend (slist *l, const char *s)
959 slist *newel = xnew (slist);
960 newel->string = xstrdup (s);
965 /* Destructively reverse L. */
968 slist_nreverse (slist *l)
973 slist *next = l->next;
981 /* Is there a specific entry in the list? */
983 slist_contains (slist *l, const char *s)
985 for (; l; l = l->next)
986 if (!strcmp (l->string, s))
991 /* Free the whole slist. */
993 slist_free (slist *l)
1004 /* Sometimes it's useful to create "sets" of strings, i.e. special
1005 hash tables where you want to store strings as keys and merely
1006 query for their existence. Here is a set of utility routines that
1007 makes that transparent. */
1010 string_set_add (struct hash_table *ht, const char *s)
1012 /* First check whether the set element already exists. If it does,
1013 do nothing so that we don't have to free() the old element and
1014 then strdup() a new one. */
1015 if (hash_table_contains (ht, s))
1018 /* We use "1" as value. It provides us a useful and clear arbitrary
1019 value, and it consumes no memory -- the pointers to the same
1020 string "1" will be shared by all the key-value pairs in all `set'
1022 hash_table_put (ht, xstrdup (s), "1");
1025 /* Synonym for hash_table_contains... */
1028 string_set_contains (struct hash_table *ht, const char *s)
1030 return hash_table_contains (ht, s);
1034 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1041 string_set_free (struct hash_table *ht)
1043 hash_table_map (ht, string_set_free_mapper, NULL);
1044 hash_table_destroy (ht);
1048 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1055 /* Another utility function: call free() on all keys and values of HT. */
1058 free_keys_and_values (struct hash_table *ht)
1060 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1064 /* Engine for legible and legible_large_int; add thousand separators
1065 to numbers printed in strings. */
1068 legible_1 (const char *repr)
1070 static char outbuf[48];
1075 /* Reset the pointers. */
1079 /* Ignore the sign for the purpose of adding thousand
1086 /* How many digits before the first separator? */
1087 mod = strlen (inptr) % 3;
1089 for (i = 0; i < mod; i++)
1090 *outptr++ = inptr[i];
1091 /* Now insert the rest of them, putting separator before every
1093 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1095 if (i % 3 == 0 && i1 != 0)
1097 *outptr++ = inptr[i1];
1099 /* Zero-terminate the string. */
1104 /* Legible -- return a static pointer to the legibly printed long. */
1110 /* Print the number into the buffer. */
1111 number_to_string (inbuf, l);
1112 return legible_1 (inbuf);
1115 /* Write a string representation of LARGE_INT NUMBER into the provided
1116 buffer. The buffer should be able to accept 24 characters,
1117 including the terminating zero.
1119 It would be dangerous to use sprintf, because the code wouldn't
1120 work on a machine with gcc-provided long long support, but without
1121 libc support for "%lld". However, such platforms will typically
1122 not have snprintf and will use our version, which does support
1123 "%lld" where long longs are available. */
1126 large_int_to_string (char *buffer, LARGE_INT number)
1128 snprintf (buffer, 24, LARGE_INT_FMT, number);
1131 /* The same as legible(), but works on LARGE_INT. */
1134 legible_large_int (LARGE_INT l)
1137 large_int_to_string (inbuf, l);
1138 return legible_1 (inbuf);
1141 /* Count the digits in a (long) integer. */
1143 numdigit (long number)
1151 while ((number /= 10) > 0)
1156 /* A half-assed implementation of INT_MAX on machines that don't
1157 bother to define one. */
1159 # define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))
1162 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1163 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1165 #define DIGITS_1(figure) ONE_DIGIT (figure)
1166 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1167 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1168 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1169 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1170 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1171 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1172 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1173 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1174 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1176 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1178 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1179 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1180 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1181 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1182 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1183 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1184 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1185 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1186 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1188 /* Print NUMBER to BUFFER in base 10. This should be completely
1189 equivalent to `sprintf(buffer, "%ld", number)', only much faster.
1191 The speedup may make a difference in programs that frequently
1192 convert numbers to strings. Some implementations of sprintf,
1193 particularly the one in GNU libc, have been known to be extremely
1194 slow compared to this function.
1196 Return the pointer to the location where the terminating zero was
1197 printed. (Equivalent to calling buffer+strlen(buffer) after the
1200 BUFFER should be big enough to accept as many bytes as you expect
1201 the number to take up. On machines with 64-bit longs the maximum
1202 needed size is 24 bytes. That includes the digits needed for the
1203 largest 64-bit number, the `-' sign in case it's negative, and the
1204 terminating '\0'. */
1207 number_to_string (char *buffer, long number)
1212 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1213 /* We are running in a strange or misconfigured environment. Let
1214 sprintf cope with it. */
1215 sprintf (buffer, "%ld", n);
1216 p += strlen (buffer);
1217 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1223 /* We cannot print a '-' and assign -n to n because -n would
1224 overflow. Let sprintf deal with this border case. */
1225 sprintf (buffer, "%ld", n);
1226 p += strlen (buffer);
1234 if (n < 10) { DIGITS_1 (1); }
1235 else if (n < 100) { DIGITS_2 (10); }
1236 else if (n < 1000) { DIGITS_3 (100); }
1237 else if (n < 10000) { DIGITS_4 (1000); }
1238 else if (n < 100000) { DIGITS_5 (10000); }
1239 else if (n < 1000000) { DIGITS_6 (100000); }
1240 else if (n < 10000000) { DIGITS_7 (1000000); }
1241 else if (n < 100000000) { DIGITS_8 (10000000); }
1242 else if (n < 1000000000) { DIGITS_9 (100000000); }
1243 #if SIZEOF_LONG == 4
1244 /* ``if (1)'' serves only to preserve editor indentation. */
1245 else if (1) { DIGITS_10 (1000000000); }
1246 #else /* SIZEOF_LONG != 4 */
1247 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1248 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1249 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1250 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1251 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1252 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1253 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1254 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1255 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1256 else { DIGITS_19 (1000000000000000000L); }
1257 #endif /* SIZEOF_LONG != 4 */
1260 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1266 #undef ONE_DIGIT_ADVANCE
1288 /* Support for timers. */
1290 #undef TIMER_WINDOWS
1291 #undef TIMER_GETTIMEOFDAY
1294 /* Depending on the OS and availability of gettimeofday(), one and
1295 only one of the above constants will be defined. Virtually all
1296 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1297 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1298 non-Windows systems without gettimeofday.
1300 #### Perhaps we should also support ftime(), which exists on old
1301 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1302 C, if memory serves me.) */
1305 # define TIMER_WINDOWS
1306 #else /* not WINDOWS */
1307 # ifdef HAVE_GETTIMEOFDAY
1308 # define TIMER_GETTIMEOFDAY
1312 #endif /* not WINDOWS */
1314 #ifdef TIMER_GETTIMEOFDAY
1315 typedef struct timeval wget_sys_time;
1319 typedef time_t wget_sys_time;
1322 #ifdef TIMER_WINDOWS
1323 typedef ULARGE_INTEGER wget_sys_time;
1327 /* The starting point in time which, subtracted from the current
1328 time, yields elapsed time. */
1329 wget_sys_time start;
1331 /* The most recent elapsed time, calculated by wtimer_elapsed().
1332 Measured in milliseconds. */
1333 double elapsed_last;
1335 /* Approximately, the time elapsed between the true start of the
1336 measurement and the time represented by START. */
1337 double elapsed_pre_start;
1340 /* Allocate a timer. It is not legal to do anything with a freshly
1341 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1344 wtimer_allocate (void)
1346 struct wget_timer *wt = xnew (struct wget_timer);
1350 /* Allocate a new timer and reset it. Return the new timer. */
1355 struct wget_timer *wt = wtimer_allocate ();
1360 /* Free the resources associated with the timer. Its further use is
1364 wtimer_delete (struct wget_timer *wt)
1369 /* Store system time to WST. */
1372 wtimer_sys_set (wget_sys_time *wst)
1374 #ifdef TIMER_GETTIMEOFDAY
1375 gettimeofday (wst, NULL);
1382 #ifdef TIMER_WINDOWS
1383 /* We use GetSystemTime to get the elapsed time. MSDN warns that
1384 system clock adjustments can skew the output of GetSystemTime
1385 when used as a timer and gives preference to GetTickCount and
1386 high-resolution timers. But GetTickCount can overflow, and hires
1387 timers are typically used for profiling, not for regular time
1388 measurement. Since we handle clock skew anyway, we just use
1392 GetSystemTime (&st);
1394 /* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy
1395 FILETIME to ULARGE_INTEGER, and use regular 64-bit integer
1396 arithmetic on that. */
1397 SystemTimeToFileTime (&st, &ft);
1398 wst->HighPart = ft.dwHighDateTime;
1399 wst->LowPart = ft.dwLowDateTime;
1403 /* Reset timer WT. This establishes the starting point from which
1404 wtimer_elapsed() will return the number of elapsed
1405 milliseconds. It is allowed to reset a previously used timer. */
1408 wtimer_reset (struct wget_timer *wt)
1410 /* Set the start time to the current time. */
1411 wtimer_sys_set (&wt->start);
1412 wt->elapsed_last = 0;
1413 wt->elapsed_pre_start = 0;
1417 wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)
1419 #ifdef TIMER_GETTIMEOFDAY
1420 return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000
1421 + (double)(wst1->tv_usec - wst2->tv_usec) / 1000);
1425 return 1000 * (*wst1 - *wst2);
1429 /* VC++ 6 doesn't support direct cast of uint64 to double. To work
1430 around this, we subtract, then convert to signed, then finally to
1432 return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;
1436 /* Return the number of milliseconds elapsed since the timer was last
1437 reset. It is allowed to call this function more than once to get
1438 increasingly higher elapsed values. These timers handle clock
1442 wtimer_elapsed (struct wget_timer *wt)
1447 wtimer_sys_set (&now);
1448 elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);
1450 /* Ideally we'd just return the difference between NOW and
1451 wt->start. However, the system timer can be set back, and we
1452 could return a value smaller than when we were last called, even
1453 a negative value. Both of these would confuse the callers, which
1454 expect us to return monotonically nondecreasing values.
1456 Therefore: if ELAPSED is smaller than its previous known value,
1457 we reset wt->start to the current time and effectively start
1458 measuring from this point. But since we don't want the elapsed
1459 value to start from zero, we set elapsed_pre_start to the last
1460 elapsed time and increment all future calculations by that
1463 if (elapsed < wt->elapsed_last)
1466 wt->elapsed_pre_start = wt->elapsed_last;
1467 elapsed = wt->elapsed_last;
1470 wt->elapsed_last = elapsed;
1474 /* Return the assessed granularity of the timer implementation, in
1475 milliseconds. This is used by code that tries to substitute a
1476 better value for timers that have returned zero. */
1479 wtimer_granularity (void)
1481 #ifdef TIMER_GETTIMEOFDAY
1482 /* Granularity of gettimeofday varies wildly between architectures.
1483 However, it appears that on modern machines it tends to be better
1484 than 1ms. Assume 100 usecs. (Perhaps the configure process
1485 could actually measure this?) */
1493 #ifdef TIMER_WINDOWS
1494 /* According to MSDN, GetSystemTime returns a broken-down time
1495 structure the smallest member of which are milliseconds. */
1500 /* This should probably be at a better place, but it doesn't really
1501 fit into html-parse.c. */
1503 /* The function returns the pointer to the malloc-ed quoted version of
1504 string s. It will recognize and quote numeric and special graphic
1505 entities, as per RFC1866:
1513 No other entities are recognized or replaced. */
1515 html_quote_string (const char *s)
1521 /* Pass through the string, and count the new size. */
1522 for (i = 0; *s; s++, i++)
1525 i += 4; /* `amp;' */
1526 else if (*s == '<' || *s == '>')
1527 i += 3; /* `lt;' and `gt;' */
1528 else if (*s == '\"')
1529 i += 5; /* `quot;' */
1533 res = (char *)xmalloc (i + 1);
1535 for (p = res; *s; s++)
1548 *p++ = (*s == '<' ? 'l' : 'g');
1575 /* Determine the width of the terminal we're running on. If that's
1576 not possible, return 0. */
1579 determine_screen_width (void)
1581 /* If there's a way to get the terminal size using POSIX
1582 tcgetattr(), somebody please tell me. */
1585 #else /* TIOCGWINSZ */
1589 if (opt.lfilename != NULL)
1592 fd = fileno (stderr);
1593 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1594 return 0; /* most likely ENOTTY */
1597 #endif /* TIOCGWINSZ */
1600 /* Return a random number between 0 and MAX-1, inclusive.
1602 If MAX is greater than the value of RAND_MAX+1 on the system, the
1603 returned value will be in the range [0, RAND_MAX]. This may be
1604 fixed in a future release.
1606 The random number generator is seeded automatically the first time
1609 This uses rand() for portability. It has been suggested that
1610 random() offers better randomness, but this is not required for
1611 Wget, so I chose to go for simplicity and use rand
1614 DO NOT use this for cryptographic purposes. It is only meant to be
1615 used in situations where quality of the random numbers returned
1616 doesn't really matter. */
1619 random_number (int max)
1627 srand (time (NULL));
1632 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1633 and enforce that assumption by masking other bits. */
1635 # define RAND_MAX 32767
1639 /* This is equivalent to rand() % max, but uses the high-order bits
1640 for better randomness on architecture where rand() is implemented
1641 using a simple congruential generator. */
1643 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1644 return (int)bounded;
1647 /* Return a random uniformly distributed floating point number in the
1648 [0, 1) range. The precision of returned numbers is 9 digits.
1650 Modify this to use erand48() where available! */
1655 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1656 sure it's greater than 1000. */
1657 int rnd1 = random_number (1000);
1658 int rnd2 = random_number (1000);
1659 int rnd3 = random_number (1000);
1660 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1664 /* A debugging function for checking whether an MD5 library works. */
1666 #include "gen-md5.h"
1669 debug_test_md5 (char *buf)
1671 unsigned char raw[16];
1672 static char res[33];
1676 ALLOCA_MD5_CONTEXT (ctx);
1679 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1680 gen_md5_finish (ctx, raw);
1687 *p2++ = XNUM_TO_digit (*p1 >> 4);
1688 *p2++ = XNUM_TO_digit (*p1 & 0xf);
1697 /* Implementation of run_with_timeout, a generic timeout-forcing
1698 routine for systems with Unix-like signal handling. */
1700 #ifdef USE_SIGNAL_TIMEOUT
1701 # ifdef HAVE_SIGSETJMP
1702 # define SETJMP(env) sigsetjmp (env, 1)
1704 static sigjmp_buf run_with_timeout_env;
1707 abort_run_with_timeout (int sig)
1709 assert (sig == SIGALRM);
1710 siglongjmp (run_with_timeout_env, -1);
1712 # else /* not HAVE_SIGSETJMP */
1713 # define SETJMP(env) setjmp (env)
1715 static jmp_buf run_with_timeout_env;
1718 abort_run_with_timeout (int sig)
1720 assert (sig == SIGALRM);
1721 /* We don't have siglongjmp to preserve the set of blocked signals;
1722 if we longjumped out of the handler at this point, SIGALRM would
1723 remain blocked. We must unblock it manually. */
1724 int mask = siggetmask ();
1725 mask &= ~sigmask (SIGALRM);
1728 /* Now it's safe to longjump. */
1729 longjmp (run_with_timeout_env, -1);
1731 # endif /* not HAVE_SIGSETJMP */
1733 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1734 setitimer where available, alarm otherwise.
1736 TIMEOUT should be non-zero. If the timeout value is so small that
1737 it would be rounded to zero, it is rounded to the least legal value
1738 instead (1us for setitimer, 1s for alarm). That ensures that
1739 SIGALRM will be delivered in all cases. */
1742 alarm_set (double timeout)
1745 /* Use the modern itimer interface. */
1746 struct itimerval itv;
1748 itv.it_value.tv_sec = (long) timeout;
1749 itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);
1750 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1751 /* Ensure that we wait for at least the minimum interval.
1752 Specifying zero would mean "wait forever". */
1753 itv.it_value.tv_usec = 1;
1754 setitimer (ITIMER_REAL, &itv, NULL);
1755 #else /* not ITIMER_REAL */
1756 /* Use the old alarm() interface. */
1757 int secs = (int) timeout;
1759 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1760 because alarm(0) means "never deliver the alarm", i.e. "wait
1761 forever", which is not what someone who specifies a 0.5s
1762 timeout would expect. */
1765 #endif /* not ITIMER_REAL */
1768 /* Cancel the alarm set with alarm_set. */
1774 struct itimerval disable;
1776 setitimer (ITIMER_REAL, &disable, NULL);
1777 #else /* not ITIMER_REAL */
1779 #endif /* not ITIMER_REAL */
1782 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1783 seconds. Returns non-zero if the function was interrupted with a
1784 timeout, zero otherwise.
1786 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1787 using setitimer() or alarm(). The timeout is enforced by
1788 longjumping out of the SIGALRM handler. This has several
1789 advantages compared to the traditional approach of relying on
1790 signals causing system calls to exit with EINTR:
1792 * The callback function is *forcibly* interrupted after the
1793 timeout expires, (almost) regardless of what it was doing and
1794 whether it was in a syscall. For example, a calculation that
1795 takes a long time is interrupted as reliably as an IO
1798 * It works with both SYSV and BSD signals because it doesn't
1799 depend on the default setting of SA_RESTART.
1801 * It doesn't special handler setup beyond a simple call to
1802 signal(). (It does use sigsetjmp/siglongjmp, but they're
1805 The only downside is that, if FUN allocates internal resources that
1806 are normally freed prior to exit from the functions, they will be
1807 lost in case of timeout. */
1810 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1820 signal (SIGALRM, abort_run_with_timeout);
1821 if (SETJMP (run_with_timeout_env) != 0)
1823 /* Longjumped out of FUN with a timeout. */
1824 signal (SIGALRM, SIG_DFL);
1827 alarm_set (timeout);
1830 /* Preserve errno in case alarm() or signal() modifies it. */
1831 saved_errno = errno;
1833 signal (SIGALRM, SIG_DFL);
1834 errno = saved_errno;
1839 #else /* not USE_SIGNAL_TIMEOUT */
1842 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1843 define it under Windows, because Windows has its own version of
1844 run_with_timeout that uses threads. */
1847 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1852 #endif /* not WINDOWS */
1853 #endif /* not USE_SIGNAL_TIMEOUT */